From 1e3aaa8beff4a0797feb2ca2cb57668ab02a95fa Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 9 Dec 2015 18:57:16 +0000
Subject: [PATCH] [InstCombine] fold bitcasts around an extractelement (2nd
 try)

This is a redo of r255124 (reverted at r255126) with an added check for a
scalar destination type and an added test for the failure seen in Clang's
test/CodeGen/vector.c. The extra test shows a different missing optimization.

Original commit message:

Example:
  bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float
    --->
  extractelement <2 x float> %X, i32 1

This is part of fixing PR25543:
https://llvm.org/bugs/show_bug.cgi?id=25543

The next step will be to generalize this fold:
trunc ( lshr ( bitcast X) ) -> extractelement (X)

Ie, I'm hoping to replace the existing transform of:
bitcast ( trunc ( lshr ( bitcast X)))
added by:
http://reviews.llvm.org/rL112232

with 2 less specific transforms to catch the case in the bug report.

Differential Revision: http://reviews.llvm.org/D14879



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@255137 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineCasts.cpp          | 39 +++++++++++++++++++
 test/Transforms/InstCombine/bitcast.ll        | 28 +++++++++----
 2 files changed, 59 insertions(+), 8 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 4afe1bb243f..2ce86436411 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1715,6 +1715,42 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI,
   return Result;
 }
 
+/// Given a bitcasted vector fed into an extract element instruction and then
+/// bitcasted again to a scalar type, eliminate at least one bitcast by changing
+/// the vector type of the extractelement instruction.
+/// Example:
+///   bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float
+///    --->
+///   extractelement <2 x float> %X, i32 1
+static Instruction *foldBitCastExtElt(BitCastInst &BitCast, InstCombiner &IC,
+                                      const DataLayout &DL) {
+  Type *DestType = BitCast.getType();
+  if (DestType->isVectorTy())
+    return nullptr;
+
+  // TODO: Create and use a pattern matcher for ExtractElementInst.
+  auto *ExtElt = dyn_cast<ExtractElementInst>(BitCast.getOperand(0));
+  if (!ExtElt || !ExtElt->hasOneUse())
+    return nullptr;
+
+  Value *InnerBitCast = nullptr;
+  if (!match(ExtElt->getOperand(0), m_BitCast(m_Value(InnerBitCast))))
+    return nullptr;
+
+  // If the element type of the vector doesn't match the result type,
+  // bitcast it to a vector type that we can extract from.
+  VectorType *VecType = cast<VectorType>(InnerBitCast->getType());
+  if (VecType->getElementType() != DestType) {
+    unsigned VecWidth = VecType->getPrimitiveSizeInBits();
+    unsigned DestWidth = DestType->getPrimitiveSizeInBits();
+    unsigned NumElts = VecWidth / DestWidth;
+    VecType = VectorType::get(DestType, NumElts);
+    InnerBitCast = IC.Builder->CreateBitCast(InnerBitCast, VecType, "bc");
+  }
+
+  return ExtractElementInst::Create(InnerBitCast, ExtElt->getOperand(1));
+}
+
 static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy,
                                          unsigned ShiftAmt, InstCombiner &IC,
                                          const DataLayout &DL) {
@@ -1886,6 +1922,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     }
   }
 
+  if (Instruction *I = foldBitCastExtElt(CI, *this, DL))
+    return I;
+
   if (SrcTy->isPointerTy())
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index 2a8194e5303..5adb7c790fa 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -64,7 +64,7 @@ define float @test3(<2 x float> %A, <2 x i64> %B) {
 ; CHECK-NEXT:  ret float %add
 }
 
-; TODO: Both bitcasts are unnecessary; change the extractelement.
+; Both bitcasts are unnecessary; change the extractelement.
 
 define float @bitcast_extelt1(<2 x float> %A) {
   %bc1 = bitcast <2 x float> %A to <2 x i32>
@@ -73,13 +73,11 @@ define float @bitcast_extelt1(<2 x float> %A) {
   ret float %bc2
 
 ; CHECK-LABEL: @bitcast_extelt1(
-; CHECK-NEXT:  %bc1 = bitcast <2 x float> %A to <2 x i32>
-; CHECK-NEXT:  %ext = extractelement <2 x i32> %bc1, i32 0
-; CHECK-NEXT:  %bc2 = bitcast i32 %ext to float
+; CHECK-NEXT:  %bc2 = extractelement <2 x float> %A, i32 0
 ; CHECK-NEXT:  ret float %bc2
 }
 
-; TODO: Second bitcast can be folded into the first.
+; Second bitcast can be folded into the first.
 
 define i64 @bitcast_extelt2(<4 x float> %A) {
   %bc1 = bitcast <4 x float> %A to <2 x double>
@@ -88,12 +86,26 @@ define i64 @bitcast_extelt2(<4 x float> %A) {
   ret i64 %bc2
 
 ; CHECK-LABEL: @bitcast_extelt2(
-; CHECK-NEXT:  %bc1 = bitcast <4 x float> %A to <2 x double>
-; CHECK-NEXT:  %ext = extractelement <2 x double> %bc1, i32 1
-; CHECK-NEXT:  %bc2 = bitcast double %ext to i64
+; CHECK-NEXT:  %bc = bitcast <4 x float> %A to <2 x i64>
+; CHECK-NEXT:  %bc2 = extractelement <2 x i64> %bc, i32 1
 ; CHECK-NEXT:  ret i64 %bc2
 }
 
+; TODO: This should return %A. 
+
+define <2 x i32> @bitcast_extelt3(<2 x i32> %A) {
+  %bc1 = bitcast <2 x i32> %A to <1 x i64>
+  %ext = extractelement <1 x i64> %bc1, i32 0
+  %bc2 = bitcast i64 %ext to <2 x i32>
+  ret <2 x i32> %bc2
+
+; CHECK-LABEL: @bitcast_extelt3(
+; CHECK-NEXT:  %bc1 = bitcast <2 x i32> %A to <1 x i64>
+; CHECK-NEXT:  %ext = extractelement <1 x i64> %bc1, i32 0
+; CHECK-NEXT:  %bc2 = bitcast i64 %ext to <2 x i32>
+; CHECK-NEXT:  ret <2 x i32> %bc2
+}
+
 define <2 x i32> @test4(i32 %A, i32 %B){
   %tmp38 = zext i32 %A to i64
   %tmp32 = zext i32 %B to i64
-- 
2.34.1