[SROA] Fix a nasty pile of bugs to do with big-endian, different alloca

author Chandler Carruth <chandlerc@gmail.com>

Wed, 22 Jul 2015 03:32:42 +0000 (03:32 +0000)

committer Chandler Carruth <chandlerc@gmail.com>

Wed, 22 Jul 2015 03:32:42 +0000 (03:32 +0000)
author Chandler Carruth <chandlerc@gmail.com>
Wed, 22 Jul 2015 03:32:42 +0000 (03:32 +0000)
committer Chandler Carruth <chandlerc@gmail.com>
Wed, 22 Jul 2015 03:32:42 +0000 (03:32 +0000)
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp

index d1a0a82b9b08e7366512e5b953cedd4e9a47d8b7..947513a365720e2efb0d58a1a0a56ac3487255d7 100644 (file)
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -1847,10 +1847,17 @@ static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
  static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
    if (OldTy == NewTy)
      return true;
-  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
-    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
-      if (NewITy->getBitWidth() >= OldITy->getBitWidth())
-        return true;
+
+  // For integer types, we can't handle any bit-width differences. This would
+  // break both vector conversions with extension and introduce endianness
+  // issues when in conjunction with loads and stores.
+  if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
+    assert(cast<IntegerType>(OldTy)->getBitWidth() !=
+               cast<IntegerType>(NewTy)->getBitWidth() &&
+           "We can't have the same bitwidth for different int types");
+    return false;
+  }
+
    if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
      return false;
    if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
@@ -1885,10 +1892,8 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
    if (OldTy == NewTy)
      return V;
  
-  if (IntegerType *OldITy = dyn_cast<IntegerType>(OldTy))
-    if (IntegerType *NewITy = dyn_cast<IntegerType>(NewTy))
-      if (NewITy->getBitWidth() > OldITy->getBitWidth())
-        return IRB.CreateZExt(V, NewITy);
+  assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
+         "Integer types must be the exact same to convert.");
  
    // See if we need inttoptr for this type pair. A cast involving both scalars
    // and vectors requires and additional bitcast.
@@ -2134,6 +2139,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
    if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
      if (LI->isVolatile())
        return false;
+    // We can't handle loads that extend past the allocated memory.
+    if (DL.getTypeStoreSize(LI->getType()) > Size)
+      return false;
      // Note that we don't count vector loads or stores as whole-alloca
      // operations which enable integer widening because we would prefer to use
      // vector widening instead.
@@ -2152,6 +2160,9 @@ static bool isIntegerWideningViableForSlice(const Slice &S,
      Type *ValueTy = SI->getValueOperand()->getType();
      if (SI->isVolatile())
        return false;
+    // We can't handle stores that extend past the allocated memory.
+    if (DL.getTypeStoreSize(ValueTy) > Size)
+      return false;
      // Note that we don't count vector loads or stores as whole-alloca
      // operations which enable integer widening because we would prefer to use
      // vector widening instead.
@@ -2585,6 +2596,7 @@ private:
  
      Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
                               : LI.getType();
+    const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
      bool IsPtrAdjusted = false;
      Value *V;
      if (VecTy) {
@@ -2592,13 +2604,27 @@ private:
      } else if (IntTy && LI.getType()->isIntegerTy()) {
        V = rewriteIntegerLoad(LI);
      } else if (NewBeginOffset == NewAllocaBeginOffset &&
-               canConvertValue(DL, NewAllocaTy, LI.getType())) {
+               NewEndOffset == NewAllocaEndOffset &&
+               (canConvertValue(DL, NewAllocaTy, TargetTy) ||
+                (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
+                 TargetTy->isIntegerTy()))) {
        LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
                                                LI.isVolatile(), LI.getName());
        if (LI.isVolatile())
          NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
-
        V = NewLI;
+
+      // If this is an integer load past the end of the slice (which means the
+      // bytes outside the slice are undef or this load is dead) just forcibly
+      // fix the integer size with correct handling of endianness.
+      if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
+        if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
+          if (AITy->getBitWidth() < TITy->getBitWidth()) {
+            V = IRB.CreateZExt(V, TITy, "load.ext");
+            if (DL.isBigEndian())
+              V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
+                                "endian_shift");
+          }
      } else {
        Type *LTy = TargetTy->getPointerTo();
        LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
@@ -2718,10 +2744,25 @@ private:
      if (IntTy && V->getType()->isIntegerTy())
        return rewriteIntegerStore(V, SI);
  
+    const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
      StoreInst *NewSI;
      if (NewBeginOffset == NewAllocaBeginOffset &&
          NewEndOffset == NewAllocaEndOffset &&
-        canConvertValue(DL, V->getType(), NewAllocaTy)) {
+        (canConvertValue(DL, V->getType(), NewAllocaTy) ||
+         (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
+          V->getType()->isIntegerTy()))) {
+      // If this is an integer store past the end of slice (and thus the bytes
+      // past that point are irrelevant or this is unreachable), truncate the
+      // value prior to storing.
+      if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
+        if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
+          if (VITy->getBitWidth() > AITy->getBitWidth()) {
+            if (DL.isBigEndian())
+              V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
+                                 "endian_shift");
+            V = IRB.CreateTrunc(V, AITy, "load.trunc");
+          }
+
        V = convertValue(DL, IRB, V, NewAllocaTy);
        NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
                                       SI.isVolatile());
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll

index 7c8955b28fa2d871a22264e232774f5f03d42ac2..ad2794167a5efadaf4fa6937a928e07938452438 100644 (file)
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1195,20 +1195,24 @@ entry:
    %a = alloca <{ i1 }>, align 8
    %b = alloca <{ i1 }>, align 8
  ; CHECK:      %[[a:.*]] = alloca i8, align 8
+; CHECK-NEXT: %[[b:.*]] = alloca i8, align 8
  
    %b.i1 = bitcast <{ i1 }>* %b to i1*
    store i1 %x, i1* %b.i1, align 8
    %b.i8 = bitcast <{ i1 }>* %b to i8*
    %foo = load i8, i8* %b.i8, align 1
-; CHECK-NEXT: %[[ext:.*]] = zext i1 %x to i8
-; CHECK-NEXT: store i8 %[[ext]], i8* %[[a]], align 8
-; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8
+; CHECK-NEXT: %[[b_cast:.*]] = bitcast i8* %[[b]] to i1*
+; CHECK-NEXT: store i1 %x, i1* %[[b_cast]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[b]], align 8
  
    %a.i8 = bitcast <{ i1 }>* %a to i8*
    call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.i8, i8* %b.i8, i32 1, i32 1, i1 false) nounwind
    %bar = load i8, i8* %a.i8, align 1
    %a.i1 = getelementptr inbounds <{ i1 }>, <{ i1 }>* %a, i32 0, i32 0
    %baz = load i1, i1* %a.i1, align 1
+; CHECK-NEXT: %[[copy:.*]] = load i8, i8* %[[b]], align 8
+; CHECK-NEXT: store i8 %[[copy]], i8* %[[a]], align 8
+; CHECK-NEXT: {{.*}} = load i8, i8* %[[a]], align 8
  ; CHECK-NEXT: %[[a_cast:.*]] = bitcast i8* %[[a]] to i1*
  ; CHECK-NEXT: {{.*}} = load i1, i1* %[[a_cast]], align 8
  
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll

index b5a04ca8e64a3e0ab649a57c0e2b42f65f62160c..4de7bfcb898d2609308695c6f745cb27ec6a01c6 100644 (file)
--- a/test/Transforms/SROA/big-endian.ll
+++ b/test/Transforms/SROA/big-endian.ll
@@ -112,3 +112,126 @@ entry:
  ; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
  ; CHECK-NEXT: ret i64 %[[ret]]
  }
+
+define i64 @PR14132(i1 %flag) {
+; CHECK-LABEL: @PR14132(
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca. While this is a bit dubious, we were asserting on trying to
+; rewrite it. The trick is that the code using the value may carefully take
+; steps to only use the not-undef bits, and so we need to at least loosely
+; support this. This test is particularly interesting because how we handle
+; a load of an i64 from an i8 alloca is dependent on endianness.
+entry:
+  %a = alloca i64, align 8
+  %b = alloca i8, align 8
+  %ptr = alloca i64*, align 8
+; CHECK-NOT: alloca
+
+  %ptr.cast = bitcast i64** %ptr to i8**
+  store i64 0, i64* %a
+  store i8 1, i8* %b
+  store i64* %a, i64** %ptr
+  br i1 %flag, label %if.then, label %if.end
+
+if.then:
+  store i8* %b, i8** %ptr.cast
+  br label %if.end
+; CHECK-NOT: store
+; CHECK: %[[ext:.*]] = zext i8 1 to i64
+; CHECK: %[[shift:.*]] = shl i64 %[[ext]], 56
+
+if.end:
+  %tmp = load i64*, i64** %ptr
+  %result = load i64, i64* %tmp
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ %[[shift]], %if.then ], [ 0, %entry ]
+
+  ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
+
+declare void @f(i64 %x, i32 %y)
+
+define void @test3() {
+; CHECK-LABEL: @test3(
+;
+; This is a test that specifically exercises the big-endian lowering because it
+; ends up splitting a 64-bit integer into two smaller integers and has a number
+; of tricky aspects (the i24 type) that make that hard. Historically, SROA
+; would miscompile this by either dropping a most significant byte or least
+; significant byte due to shrinking the [4,8) slice to an i24, or by failing to
+; move the bytes around correctly.
+;
+; The magical number 34494054408 is used because it has bits set in various
+; bytes so that it is clear if those bytes fail to be propagated.
+;
+; If you're debugging this, rather than using the direct magical numbers, run
+; the IR through '-sroa -instcombine'. With '-instcombine' these will be
+; constant folded, and if the i64 doesn't round-trip correctly, you've found
+; a bug!
+;
+entry:
+  %a = alloca { i32, i24 }, align 4
+; CHECK-NOT: alloca
+
+  %tmp0 = bitcast { i32, i24 }* %a to i64*
+  store i64 34494054408, i64* %tmp0
+  %tmp1 = load i64, i64* %tmp0, align 4
+  %tmp2 = bitcast { i32, i24 }* %a to i32*
+  %tmp3 = load i32, i32* %tmp2, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 134316040 to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 8 to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+  call void @f(i64 %tmp1, i32 %tmp3)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 8)
+  ret void
+; CHECK: ret void
+}
+
+define void @test4() {
+; CHECK-LABEL: @test4
+;
+; Much like @test3, this is specifically testing big-endian management of data.
+; Also similarly, it uses constants with particular bits set to help track
+; whether values are corrupted, and can be easily evaluated by running through
+; -instcombine to see that the i64 round-trips.
+;
+entry:
+  %a = alloca { i32, i24 }, align 4
+  %a2 = alloca i64, align 4
+; CHECK-NOT: alloca
+
+  store i64 34494054408, i64* %a2
+  %tmp0 = bitcast { i32, i24 }* %a to i8*
+  %tmp1 = bitcast i64* %a2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp0, i8* %tmp1, i64 8, i32 4, i1 false)
+; CHECK: %[[LO_SHR:.*]] = lshr i64 34494054408, 32
+; CHECK: %[[LO_START:.*]] = trunc i64 %[[LO_SHR]] to i32
+; CHECK: %[[HI_START:.*]] = trunc i64 34494054408 to i32
+
+  %tmp2 = bitcast { i32, i24 }* %a to i64*
+  %tmp3 = load i64, i64* %tmp2, align 4
+  %tmp4 = bitcast { i32, i24 }* %a to i32*
+  %tmp5 = load i32, i32* %tmp4, align 4
+; CHECK: %[[HI_EXT:.*]] = zext i32 %[[HI_START]] to i64
+; CHECK: %[[HI_INPUT:.*]] = and i64 undef, -4294967296
+; CHECK: %[[HI_MERGE:.*]] = or i64 %[[HI_INPUT]], %[[HI_EXT]]
+; CHECK: %[[LO_EXT:.*]] = zext i32 %[[LO_START]] to i64
+; CHECK: %[[LO_SHL:.*]] = shl i64 %[[LO_EXT]], 32
+; CHECK: %[[LO_INPUT:.*]] = and i64 %[[HI_MERGE]], 4294967295
+; CHECK: %[[LO_MERGE:.*]] = or i64 %[[LO_INPUT]], %[[LO_SHL]]
+
+  call void @f(i64 %tmp3, i32 %tmp5)
+; CHECK: call void @f(i64 %[[LO_MERGE]], i32 %[[LO_START]])
+  ret void
+; CHECK: ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll

index e97bd66d052a4da833261f7897333495f5778150..fb76548b1d18787f262236d830b4c5a761ae8e4c 100644 (file)
--- a/test/Transforms/SROA/phi-and-select.ll
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -438,26 +438,26 @@ define i64 @PR14132(i1 %flag) {
  ; steps to only use the not-undef bits, and so we need to at least loosely
  ; support this..
  entry:
-  %a = alloca i64
-  %b = alloca i8
-  %ptr = alloca i64*
+  %a = alloca i64, align 8
+  %b = alloca i8, align 8
+  %ptr = alloca i64*, align 8
  ; CHECK-NOT: alloca
  
    %ptr.cast = bitcast i64** %ptr to i8**
-  store i64 0, i64* %a
-  store i8 1, i8* %b
-  store i64* %a, i64** %ptr
+  store i64 0, i64* %a, align 8
+  store i8 1, i8* %b, align 8
+  store i64* %a, i64** %ptr, align 8
    br i1 %flag, label %if.then, label %if.end
  
  if.then:
-  store i8* %b, i8** %ptr.cast
+  store i8* %b, i8** %ptr.cast, align 8
    br label %if.end
  ; CHECK-NOT: store
  ; CHECK: %[[ext:.*]] = zext i8 1 to i64
  
  if.end:
-  %tmp = load i64*, i64** %ptr
-  %result = load i64, i64* %tmp
+  %tmp = load i64*, i64** %ptr, align 8
+  %result = load i64, i64* %tmp, align 8
  ; CHECK-NOT: load
  ; CHECK: %[[result:.*]] = phi i64 [ %[[ext]], %if.then ], [ 0, %entry ]
author	Chandler Carruth <chandlerc@gmail.com>
	Wed, 22 Jul 2015 03:32:42 +0000 (03:32 +0000)
committer	Chandler Carruth <chandlerc@gmail.com>
	Wed, 22 Jul 2015 03:32:42 +0000 (03:32 +0000)
lib/Transforms/Scalar/SROA.cpp		patch \| blob \| history
test/Transforms/SROA/basictest.ll		patch \| blob \| history
test/Transforms/SROA/big-endian.ll		patch \| blob \| history
test/Transforms/SROA/phi-and-select.ll		patch \| blob \| history