Fix big-endian handling of integer-to-vector bitcasts in InstCombine

author Richard Sandiford <rsandifo@linux.vnet.ibm.com>

Mon, 12 Aug 2013 07:26:09 +0000 (07:26 +0000)

committer Richard Sandiford <rsandifo@linux.vnet.ibm.com>

Mon, 12 Aug 2013 07:26:09 +0000 (07:26 +0000)
author Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Mon, 12 Aug 2013 07:26:09 +0000 (07:26 +0000)
committer Richard Sandiford <rsandifo@linux.vnet.ibm.com>
Mon, 12 Aug 2013 07:26:09 +0000 (07:26 +0000)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp

index 361acdde81fbb65d85c2d0147c789161eb17f087..e2f64d7bceae3a78634f827cd1cd5083ce28a409 100644 (file)
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1488,12 +1488,17 @@ static unsigned getTypeSizeIndex(unsigned Value, Type *Ty) {
  /// insertions into the vector.  See the example in the comment for
  /// OptimizeIntegerToVectorInsertions for the pattern this handles.
  /// The type of V is always a non-zero multiple of VecEltTy's size.
+/// Shift is the number of bits between the lsb of V and the lsb of
+/// the vector.
  ///
  /// This returns false if the pattern can't be matched or true if it can,
  /// filling in Elements with the elements found here.
-static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
+static bool CollectInsertionElements(Value *V, unsigned Shift,
                                       SmallVectorImpl<Value*> &Elements,
-                                     Type *VecEltTy) {
+                                     Type *VecEltTy, InstCombiner &IC) {
+  assert(isMultipleOfTypeSize(Shift, VecEltTy) &&
+         "Shift should be a multiple of the element type size");
+
    // Undef values never contribute useful bits to the result.
    if (isa<UndefValue>(V)) return true;
  
@@ -1505,8 +1510,12 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
        if (C->isNullValue())
          return true;
  
+    unsigned ElementIndex = getTypeSizeIndex(Shift, VecEltTy);
+    if (IC.getDataLayout()->isBigEndian())
+      ElementIndex = Elements.size() - ElementIndex - 1;
+
      // Fail if multiple elements are inserted into this slot.
-    if (ElementIndex >= Elements.size() || Elements[ElementIndex] != 0)
+    if (Elements[ElementIndex] != 0)
        return false;
  
      Elements[ElementIndex] = V;
@@ -1522,7 +1531,7 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
      // it to the right type so it gets properly inserted.
      if (NumElts == 1)
        return CollectInsertionElements(ConstantExpr::getBitCast(C, VecEltTy),
-                                      ElementIndex, Elements, VecEltTy);
+                                      Shift, Elements, VecEltTy, IC);
  
      // Okay, this is a constant that covers multiple elements.  Slice it up into
      // pieces and insert each element-sized piece into the vector.
@@ -1533,10 +1542,11 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
      Type *ElementIntTy = IntegerType::get(C->getContext(), ElementSize);
  
      for (unsigned i = 0; i != NumElts; ++i) {
+      unsigned ShiftI = Shift+i*ElementSize;
        Constant *Piece = ConstantExpr::getLShr(C, ConstantInt::get(C->getType(),
-                                                               i*ElementSize));
+                                                                  ShiftI));
        Piece = ConstantExpr::getTrunc(Piece, ElementIntTy);
-      if (!CollectInsertionElements(Piece, ElementIndex+i, Elements, VecEltTy))
+      if (!CollectInsertionElements(Piece, ShiftI, Elements, VecEltTy, IC))
          return false;
      }
      return true;
@@ -1549,29 +1559,28 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
    switch (I->getOpcode()) {
    default: return false; // Unhandled case.
    case Instruction::BitCast:
-    return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC);
    case Instruction::ZExt:
      if (!isMultipleOfTypeSize(
                            I->getOperand(0)->getType()->getPrimitiveSizeInBits(),
                                VecEltTy))
        return false;
-    return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy);
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC);
    case Instruction::Or:
-    return CollectInsertionElements(I->getOperand(0), ElementIndex,
-                                    Elements, VecEltTy) &&
-           CollectInsertionElements(I->getOperand(1), ElementIndex,
-                                    Elements, VecEltTy);
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC) &&
+           CollectInsertionElements(I->getOperand(1), Shift,
+                                    Elements, VecEltTy, IC);
    case Instruction::Shl: {
      // Must be shifting by a constant that is a multiple of the element size.
      ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
      if (CI == 0) return false;
-    if (!isMultipleOfTypeSize(CI->getZExtValue(), VecEltTy)) return false;
-    unsigned IndexShift = getTypeSizeIndex(CI->getZExtValue(), VecEltTy);
-
-    return CollectInsertionElements(I->getOperand(0), ElementIndex+IndexShift,
-                                    Elements, VecEltTy);
+    Shift += CI->getZExtValue();
+    if (!isMultipleOfTypeSize(Shift, VecEltTy)) return false;
+    return CollectInsertionElements(I->getOperand(0), Shift,
+                                    Elements, VecEltTy, IC);
    }
  
    }
@@ -1594,12 +1603,15 @@ static bool CollectInsertionElements(Value *V, unsigned ElementIndex,
  /// Into two insertelements that do "buildvector{%inc, %inc5}".
  static Value *OptimizeIntegerToVectorInsertions(BitCastInst &CI,
                                                  InstCombiner &IC) {
+  // We need to know the target byte order to perform this optimization.
+  if (!IC.getDataLayout()) return 0;
+
    VectorType *DestVecTy = cast<VectorType>(CI.getType());
    Value *IntInput = CI.getOperand(0);
  
    SmallVector<Value*, 8> Elements(DestVecTy->getNumElements());
    if (!CollectInsertionElements(IntInput, 0, Elements,
-                                DestVecTy->getElementType()))
+                                DestVecTy->getElementType(), IC))
      return 0;
  
    // If we succeeded, we know that all of the element are specified by Elements
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll

index 28b0e9ae3faf0288b2477ecb45fcb90c413c12c3..ed812e15f38568c09c340cacaa4a4a92a956616c 100644 (file)
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -48,3 +48,44 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
  ; CHECK-NEXT:  ret float %add
  }
  
+define <2 x i32> @test4(i32 %A, i32 %B){
+  %tmp38 = zext i32 %A to i64
+  %tmp32 = zext i32 %B to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x i32>
+  ret <2 x i32> %tmp43
+  ; CHECK-LABEL: @test4(
+  ; CHECK-NEXT: insertelement <2 x i32> undef, i32 %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x i32> {{.*}}, i32 %A, i32 1
+  ; CHECK-NEXT: ret <2 x i32>
+
+}
+
+define <2 x float> @test5(float %A, float %B) {
+  %tmp37 = bitcast float %A to i32
+  %tmp38 = zext i32 %tmp37 to i64
+  %tmp31 = bitcast float %B to i32
+  %tmp32 = zext i32 %tmp31 to i64
+  %tmp33 = shl i64 %tmp32, 32
+  %ins35 = or i64 %tmp33, %tmp38
+  %tmp43 = bitcast i64 %ins35 to <2 x float>
+  ret <2 x float> %tmp43
+  ; CHECK-LABEL: @test5(
+  ; CHECK-NEXT: insertelement <2 x float> undef, float %B, i32 0
+  ; CHECK-NEXT: insertelement <2 x float> {{.*}}, float %A, i32 1
+  ; CHECK-NEXT: ret <2 x float>
+}
+
+define <2 x float> @test6(float %A){
+  %tmp23 = bitcast float %A to i32              ; <i32> [#uses=1]
+  %tmp24 = zext i32 %tmp23 to i64                 ; <i64> [#uses=1]
+  %tmp25 = shl i64 %tmp24, 32                     ; <i64> [#uses=1]
+  %mask20 = or i64 %tmp25, 1109917696             ; <i64> [#uses=1]
+  %tmp35 = bitcast i64 %mask20 to <2 x float>     ; <<2 x float>> [#uses=1]
+  ret <2 x float> %tmp35
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: insertelement <2 x float> undef, float %A, i32 0
+; CHECK-NEXT: insertelement <2 x float> {{.*}}, float 4.200000e+01, i32 1
+; CHECK: ret
+}
author	Richard Sandiford <rsandifo@linux.vnet.ibm.com>
	Mon, 12 Aug 2013 07:26:09 +0000 (07:26 +0000)
committer	Richard Sandiford <rsandifo@linux.vnet.ibm.com>
	Mon, 12 Aug 2013 07:26:09 +0000 (07:26 +0000)
lib/Transforms/InstCombine/InstCombineCasts.cpp		patch \| blob \| history
test/Transforms/InstCombine/bitcast-bigendian.ll		patch \| blob \| history