From 26dbe7ec18740f642febcc738e628d921aafd079 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 26 Aug 2010 22:14:59 +0000 Subject: [PATCH] optimize "integer extraction out of the middle of a vector" as produced by SRoA. This is part of rdar://7892780, but needs another xform to expose this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112232 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCasts.cpp | 48 ++++++++++++++----- test/Transforms/InstCombine/bitcast.ll | 25 ++++++++++ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index ef5bbc4798d..82c359194f7 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1337,31 +1337,53 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, /// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double /// bitcast. The various long double bitcasts can't get in here. -static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) { +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC){ Value *Src = CI.getOperand(0); + const Type *DestTy = CI.getType(); // If this is a bitcast from int to float, check to see if the int is an // extraction from a vector. Value *VecInput = 0; + // bitcast(trunc(bitcast(somevector))) if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && isa(VecInput->getType())) { const VectorType *VecTy = cast(VecInput->getType()); - const Type *DestTy = CI.getType(); - - // If the element type of the vector doesn't match the result type, but the - // vector type's size is a multiple of the result type, bitcast it to be a - // vector type we can extract from. - if (VecTy->getElementType() != DestTy && - VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) { - VecTy = VectorType::get(DestTy, - VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits()); - VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); - } + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } - if (VecTy->getElementType() == DestTy) return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + } } + // bitcast(trunc(lshr(bitcast(somevector), cst)) + ConstantInt *ShAmt = 0; + if (match(Src, m_Trunc(m_LShr(m_BitCast(m_Value(VecInput)), + m_ConstantInt(ShAmt)))) && + isa(VecInput->getType())) { + const VectorType *VecTy = cast(VecInput->getType()); + unsigned DestWidth = DestTy->getPrimitiveSizeInBits(); + if (VecTy->getPrimitiveSizeInBits() % DestWidth == 0 && + ShAmt->getZExtValue() % DestWidth == 0) { + // If the element type of the vector doesn't match the result type, + // bitcast it to be a vector type we can extract from. + if (VecTy->getElementType() != DestTy) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestWidth); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + unsigned Elt = ShAmt->getZExtValue() / DestWidth; + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt)); + } + } return 0; } diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll index c248b5e4bc8..10898397b98 100644 --- a/test/Transforms/InstCombine/bitcast.ll +++ b/test/Transforms/InstCombine/bitcast.ll @@ -35,3 +35,28 @@ define float @test2(<2 x float> %A, <2 x i32> %B) { ; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 ; CHECK-NEXT: ret float %add } + +; Optimize bitcasts that are extracting other elements of a vector. This +; happens because of SRoA. +; rdar://7892780 +define float @test3(<2 x float> %A, <2 x i64> %B) { + %tmp28 = bitcast <2 x float> %A to i64 + %tmp29 = lshr i64 %tmp28, 32 + %tmp23 = trunc i64 %tmp29 to i32 + %tmp24 = bitcast i32 %tmp23 to float + + %tmp = bitcast <2 x i64> %B to i128 + %tmp1 = lshr i128 %tmp, 64 + %tmp2 = trunc i128 %tmp1 to i32 + %tmp4 = bitcast i32 %tmp2 to float + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test3 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 1 +; CHECK-NEXT: bitcast <2 x i64> %B to <4 x float> +; CHECK-NEXT: %tmp4 = extractelement <4 x float> {{.*}}, i32 2 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +} -- 2.34.1