From: Chris Lattner Date: Thu, 26 Aug 2010 21:55:42 +0000 (+0000) Subject: optimize bitcast(trunc(bitcast(x))) where the result is a float and 'x' X-Git-Url: http://plrg.eecs.uci.edu/git/?a=commitdiff_plain;h=e5a1426174986951a20ec6ffd119b77a24a65706;p=oota-llvm.git optimize bitcast(trunc(bitcast(x))) where the result is a float and 'x' is a vector to be a vector element extraction. This allows clang to compile: struct S { float A, B, C, D; }; float foo(struct S A) { return A.A + A.B+A.C+A.D; } into: _foo: ## @foo ## BB#0: ## %entry movd %xmm0, %rax shrq $32, %rax movd %eax, %xmm2 addss %xmm0, %xmm2 movapd %xmm1, %xmm3 addss %xmm2, %xmm3 movd %xmm1, %rax shrq $32, %rax movd %eax, %xmm0 addss %xmm3, %xmm0 ret instead of: _foo: ## @foo ## BB#0: ## %entry movd %xmm0, %rax movd %eax, %xmm0 shrq $32, %rax movd %eax, %xmm2 addss %xmm0, %xmm2 movd %xmm1, %rax movd %eax, %xmm1 addss %xmm2, %xmm1 shrq $32, %rax movd %eax, %xmm0 addss %xmm1, %xmm0 ret ... eliminating half of the horribleness. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112227 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index dbe5200d000..ef5bbc4798d 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1335,6 +1335,35 @@ static Instruction *OptimizeVectorResize(Value *InVal, const VectorType *DestTy, return new ShuffleVectorInst(InVal, V2, Mask); } +/// OptimizeIntToFloatBitCast - See if we can optimize an integer->float/double +/// bitcast. The various long double bitcasts can't get in here. +static Instruction *OptimizeIntToFloatBitCast(BitCastInst &CI,InstCombiner &IC) { + Value *Src = CI.getOperand(0); + + // If this is a bitcast from int to float, check to see if the int is an + // extraction from a vector. + Value *VecInput = 0; + if (match(Src, m_Trunc(m_BitCast(m_Value(VecInput)))) && + isa(VecInput->getType())) { + const VectorType *VecTy = cast(VecInput->getType()); + const Type *DestTy = CI.getType(); + + // If the element type of the vector doesn't match the result type, but the + // vector type's size is a multiple of the result type, bitcast it to be a + // vector type we can extract from. + if (VecTy->getElementType() != DestTy && + VecTy->getPrimitiveSizeInBits() % DestTy->getPrimitiveSizeInBits()==0) { + VecTy = VectorType::get(DestTy, + VecTy->getPrimitiveSizeInBits() / DestTy->getPrimitiveSizeInBits()); + VecInput = IC.Builder->CreateBitCast(VecInput, VecTy); + } + + if (VecTy->getElementType() == DestTy) + return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(0)); + } + + return 0; +} Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { // If the operands are integer typed then apply the integer transforms, @@ -1386,6 +1415,11 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { ((Instruction*)NULL)); } } + + // Try to optimize int -> float bitcasts. + if ((DestTy->isFloatTy() || DestTy->isDoubleTy()) && isa(SrcTy)) + if (Instruction *I = OptimizeIntToFloatBitCast(CI, *this)) + return I; if (const VectorType *DestVTy = dyn_cast(DestTy)) { if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) { diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll index 88fa9a43e54..c248b5e4bc8 100644 --- a/test/Transforms/InstCombine/bitcast.ll +++ b/test/Transforms/InstCombine/bitcast.ll @@ -13,3 +13,25 @@ define i32 @test1(i64 %a) { ; CHECK: ret i32 0 } +; Optimize bitcasts that are extracting low element of vector. This happens +; because of SRoA. +; rdar://7892780 +define float @test2(<2 x float> %A, <2 x i32> %B) { + %tmp28 = bitcast <2 x float> %A to i64 ; [#uses=2] + %tmp23 = trunc i64 %tmp28 to i32 ; [#uses=1] + %tmp24 = bitcast i32 %tmp23 to float ; [#uses=1] + + %tmp = bitcast <2 x i32> %B to i64 + %tmp2 = trunc i64 %tmp to i32 ; [#uses=1] + %tmp4 = bitcast i32 %tmp2 to float ; [#uses=1] + + %add = fadd float %tmp24, %tmp4 + ret float %add + +; CHECK: @test2 +; CHECK-NEXT: %tmp24 = extractelement <2 x float> %A, i32 0 +; CHECK-NEXT: bitcast <2 x i32> %B to <2 x float> +; CHECK-NEXT: %tmp4 = extractelement <2 x float> {{.*}}, i32 0 +; CHECK-NEXT: %add = fadd float %tmp24, %tmp4 +; CHECK-NEXT: ret float %add +}