From d2153694e0fd8ab155e65b02a821df02b3f30130 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Thu, 23 Oct 2014 21:52:45 +0000 Subject: [PATCH] Handle sqrt() shrinking in SimplifyLibCalls like any other call This patch removes a chunk of special case logic for folding (float)sqrt((double)x) -> sqrtf(x) in InstCombineCasts and handles it in the mainstream path of SimplifyLibCalls. No functional change intended, but I loosened the restriction on the existing sqrt testcases to allow for this optimization even without unsafe-fp-math because that's the existing behavior. I also added a missing test case for not shrinking the llvm.sqrt.f64 intrinsic in case the result is used as a double. Differential Revision: http://reviews.llvm.org/D5919 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220514 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineCasts.cpp | 36 ------------------- lib/Transforms/Utils/SimplifyLibCalls.cpp | 19 +++++++--- .../InstCombine/double-float-shrink-1.ll | 25 +++++++++---- 3 files changed, 33 insertions(+), 47 deletions(-) diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index c16992ff626..cbcc85944a7 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1317,42 +1317,6 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { } } - // Fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) - // Note that we restrict this transformation based on - // TLI->has(LibFunc::sqrtf), even for the sqrt intrinsic, because - // TLI->has(LibFunc::sqrtf) is sufficient to guarantee that the - // single-precision intrinsic can be expanded in the backend. - CallInst *Call = dyn_cast(CI.getOperand(0)); - if (Call && Call->getCalledFunction() && TLI->has(LibFunc::sqrtf) && - (Call->getCalledFunction()->getName() == TLI->getName(LibFunc::sqrt) || - Call->getCalledFunction()->getIntrinsicID() == Intrinsic::sqrt) && - Call->getNumArgOperands() == 1 && - Call->hasOneUse()) { - CastInst *Arg = dyn_cast(Call->getArgOperand(0)); - if (Arg && Arg->getOpcode() == Instruction::FPExt && - CI.getType()->isFloatTy() && - Call->getType()->isDoubleTy() && - Arg->getType()->isDoubleTy() && - Arg->getOperand(0)->getType()->isFloatTy()) { - Function *Callee = Call->getCalledFunction(); - Module *M = CI.getParent()->getParent()->getParent(); - Constant *SqrtfFunc = (Callee->getIntrinsicID() == Intrinsic::sqrt) ? - Intrinsic::getDeclaration(M, Intrinsic::sqrt, Builder->getFloatTy()) : - M->getOrInsertFunction("sqrtf", Callee->getAttributes(), - Builder->getFloatTy(), Builder->getFloatTy(), - NULL); - CallInst *ret = CallInst::Create(SqrtfFunc, Arg->getOperand(0), - "sqrtfcall"); - ret->setAttributes(Callee->getAttributes()); - - - // Remove the old Call. With -fmath-errno, it won't get marked readnone. - ReplaceInstUsesWith(*Call, UndefValue::get(Call->getType())); - EraseInstFromFunction(*Call); - return ret; - } - } - return nullptr; } diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index caae06c4211..207d3664789 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1058,7 +1058,16 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, // floor((double)floatval) -> (double)floorf(floatval) Value *V = Cast->getOperand(0); - V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + if (Callee->isIntrinsic()) { + Module *M = CI->getParent()->getParent()->getParent(); + Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID(); + Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy()); + V = B.CreateCall(F, V); + } else { + // The call is a library call rather than an intrinsic. + V = EmitUnaryFloatFnCall(V, Callee->getName(), B, Callee->getAttributes()); + } + return B.CreateFPExt(V, B.getDoubleTy()); } @@ -1086,6 +1095,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Value *V = nullptr; Value *V1 = Cast1->getOperand(0); Value *V2 = Cast2->getOperand(0); + // TODO: Handle intrinsics in the same way as in optimizeUnaryDoubleFP(). V = EmitBinaryFloatFnCall(V1, V2, Callee->getName(), B, Callee->getAttributes()); return B.CreateFPExt(V, B.getDoubleTy()); @@ -1267,10 +1277,9 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; - if (UnsafeFPShrink && Callee->getName() == "sqrt" && - TLI->has(LibFunc::sqrtf)) { + if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || + Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); - } // FIXME: For finer-grain optimization, we need intrinsics to have the same // fast-math flag decorations that are applied to FP instructions. For now, @@ -2010,7 +2019,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { UnsafeFPShrink = true; } - // Next check for intrinsics. + // First, check for intrinsics. if (IntrinsicInst *II = dyn_cast(CI)) { if (!isCallingConvC) return nullptr; diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll index 9b422b77296..63a02bbd857 100644 --- a/test/Transforms/InstCombine/double-float-shrink-1.ll +++ b/test/Transforms/InstCombine/double-float-shrink-1.ll @@ -279,6 +279,14 @@ define float @sqrt_test(float %f) { ; CHECK: call float @sqrtf(float %f) } +define double @sqrt_test2(float %f) { + %conv = fpext float %f to double + %call = call double @sqrt(double %conv) + ret double %call +; CHECK-LABEL: sqrt_test2 +; CHECK: call double @sqrt(double %conv) +} + define float @sqrt_int_test(float %f) { %conv = fpext float %f to double %call = call double @llvm.sqrt.f64(double %conv) @@ -288,13 +296,14 @@ define float @sqrt_int_test(float %f) { ; CHECK: call float @llvm.sqrt.f32(float %f) } -define double @sqrt_test2(float %f) { +define double @sqrt_int_test2(float %f) { %conv = fpext float %f to double - %call = call double @sqrt(double %conv) + %call = call double @llvm.sqrt.f64(double %conv) ret double %call -; CHECK-LABEL: sqrt_test2 -; CHECK: call double @sqrt(double %conv) +; CHECK-LABEL: sqrt_int_test2 +; CHECK: call double @llvm.sqrt.f64(double %conv) } + define float @tan_test(float %f) { %conv = fpext float %f to double %call = call double @tan(double %conv) @@ -330,7 +339,12 @@ define double @tanh_test2(float %f) { declare double @tanh(double) #1 declare double @tan(double) #1 -declare double @sqrt(double) #1 + +; sqrt is a special case: the shrinking optimization +; is valid even without unsafe-fp-math. +declare double @sqrt(double) +declare double @llvm.sqrt.f64(double) + declare double @sin(double) #1 declare double @log2(double) #1 declare double @log1p(double) #1 @@ -348,6 +362,5 @@ declare double @acosh(double) #1 declare double @asin(double) #1 declare double @asinh(double) #1 -declare double @llvm.sqrt.f64(double) #1 attributes #1 = { "unsafe-fp-math"="true" } -- 2.34.1