From 69ab373dfec278c50957ac1be5331b20a05d1334 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sun, 16 Aug 2015 20:18:19 +0000 Subject: [PATCH] transform fmin/fmax calls when possible (PR24314) If we can ignore NaNs, fmin/fmax libcalls can become compare and select (this is what we turn std::min / std::max into). This IR should then be optimized in the backend to whatever is best for any given target. Eg, x86 can use minss/maxss instructions. This should solve PR24314: https://llvm.org/bugs/show_bug.cgi?id=24314 Differential Revision: http://reviews.llvm.org/D11866 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@245187 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/Transforms/Utils/SimplifyLibCalls.h | 1 + lib/Transforms/Utils/SimplifyLibCalls.cpp | 63 ++++++++++- test/Transforms/InstCombine/fast-math.ll | 107 ++++++++++++++++++ 3 files changed, 169 insertions(+), 2 deletions(-) diff --git a/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 41159603aae..89686fd5cbc 100644 --- a/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -131,6 +131,7 @@ private: Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); Value *optimizeFabs(CallInst *CI, IRBuilder<> &B); + Value *optimizeFMinFMax(CallInst *CI, IRBuilder<> &B); Value *optimizeSqrt(CallInst *CI, IRBuilder<> &B); Value *optimizeSinCosPi(CallInst *CI, IRBuilder<> &B); diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index bb03c98b492..50513642016 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1184,6 +1184,60 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) { return Ret; } +Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) { + // If we can shrink the call to a float function rather than a double + // function, do that first. + Function *Callee = CI->getCalledFunction(); + if ((Callee->getName() == "fmin" && TLI->has(LibFunc::fminf)) || + (Callee->getName() == "fmax" && TLI->has(LibFunc::fmaxf))) { + Value *Ret = optimizeBinaryDoubleFP(CI, B); + if (Ret) + return Ret; + } + + // Make sure this has 2 arguments of FP type which match the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || + FT->getParamType(0) != FT->getParamType(1) || + !FT->getParamType(0)->isFloatingPointTy()) + return nullptr; + + // FIXME: For finer-grain optimization, we need intrinsics to have the same + // fast-math flag decorations that are applied to FP instructions. For now, + // we have to rely on the function-level attributes to do this optimization + // because there's no other way to express that the calls can be relaxed. + IRBuilder >::FastMathFlagGuard Guard(B); + FastMathFlags FMF; + Function *F = CI->getParent()->getParent(); + Attribute Attr = F->getFnAttribute("unsafe-fp-math"); + if (Attr.getValueAsString() == "true") { + // Unsafe algebra sets all fast-math-flags to true. + FMF.setUnsafeAlgebra(); + } else { + // At a minimum, no-nans-fp-math must be true. + Attr = F->getFnAttribute("no-nans-fp-math"); + if (Attr.getValueAsString() != "true") + return nullptr; + // No-signed-zeros is implied by the definitions of fmax/fmin themselves: + // "Ideally, fmax would be sensitive to the sign of zero, for example + // fmax(−0. 0, +0. 0) would return +0; however, implementation in software + // might be impractical." + FMF.setNoSignedZeros(); + FMF.setNoNaNs(); + } + B.SetFastMathFlags(FMF); + + // We have a relaxed floating-point environment. We can ignore NaN-handling + // and transform to a compare and select. We do not have to consider errno or + // exceptions, because fmin/fmax do not have those. + Value *Op0 = CI->getArgOperand(0); + Value *Op1 = CI->getArgOperand(1); + Value *Cmp = Callee->getName().startswith("fmin") ? + B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1); + return B.CreateSelect(Cmp, Op0, Op1); +} + Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); @@ -2110,11 +2164,16 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) { return optimizeUnaryDoubleFP(CI, Builder, true); return nullptr; case LibFunc::copysign: - case LibFunc::fmin: - case LibFunc::fmax: if (hasFloatVersion(FuncName)) return optimizeBinaryDoubleFP(CI, Builder); return nullptr; + case LibFunc::fminf: + case LibFunc::fmin: + case LibFunc::fminl: + case LibFunc::fmaxf: + case LibFunc::fmax: + case LibFunc::fmaxl: + return optimizeFMinFMax(CI, Builder); default: return nullptr; } diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll index 4eebdbdfacf..06852497564 100644 --- a/test/Transforms/InstCombine/fast-math.ll +++ b/test/Transforms/InstCombine/fast-math.ll @@ -716,3 +716,110 @@ define fp128 @sqrt_call_squared_f128(fp128 %x) #0 { ; CHECK-NEXT: ret fp128 %fabs } +; ========================================================================= +; +; Test-cases for fmin / fmax +; +; ========================================================================= + +declare double @fmax(double, double) +declare double @fmin(double, double) +declare float @fmaxf(float, float) +declare float @fminf(float, float) +declare fp128 @fmaxl(fp128, fp128) +declare fp128 @fminl(fp128, fp128) + +; No NaNs is the minimum requirement to replace these calls. +; This should always be set when unsafe-fp-math is true, but +; alternate the attributes for additional test coverage. +; 'nsz' is implied by the definition of fmax or fmin itself. +attributes #1 = { "no-nans-fp-math" = "true" } + +; Shrink and remove the call. +define float @max1(float %a, float %b) #0 { + %c = fpext float %a to double + %d = fpext float %b to double + %e = call double @fmax(double %c, double %d) + %f = fptrunc double %e to float + ret float %f + +; CHECK-LABEL: max1( +; CHECK-NEXT: fcmp fast ogt float %a, %b +; CHECK-NEXT: select {{.*}} float %a, float %b +; CHECK-NEXT: ret +} + +define float @max2(float %a, float %b) #1 { + %c = call float @fmaxf(float %a, float %b) + ret float %c + +; CHECK-LABEL: max2( +; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b +; CHECK-NEXT: select {{.*}} float %a, float %b +; CHECK-NEXT: ret +} + + +define double @max3(double %a, double %b) #0 { + %c = call double @fmax(double %a, double %b) + ret double %c + +; CHECK-LABEL: max3( +; CHECK-NEXT: fcmp fast ogt double %a, %b +; CHECK-NEXT: select {{.*}} double %a, double %b +; CHECK-NEXT: ret +} + +define fp128 @max4(fp128 %a, fp128 %b) #1 { + %c = call fp128 @fmaxl(fp128 %a, fp128 %b) + ret fp128 %c + +; CHECK-LABEL: max4( +; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b +; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b +; CHECK-NEXT: ret +} + +; Shrink and remove the call. +define float @min1(float %a, float %b) #1 { + %c = fpext float %a to double + %d = fpext float %b to double + %e = call double @fmin(double %c, double %d) + %f = fptrunc double %e to float + ret float %f + +; CHECK-LABEL: min1( +; CHECK-NEXT: fcmp nnan nsz olt float %a, %b +; CHECK-NEXT: select {{.*}} float %a, float %b +; CHECK-NEXT: ret +} + +define float @min2(float %a, float %b) #0 { + %c = call float @fminf(float %a, float %b) + ret float %c + +; CHECK-LABEL: min2( +; CHECK-NEXT: fcmp fast olt float %a, %b +; CHECK-NEXT: select {{.*}} float %a, float %b +; CHECK-NEXT: ret +} + +define double @min3(double %a, double %b) #1 { + %c = call double @fmin(double %a, double %b) + ret double %c + +; CHECK-LABEL: min3( +; CHECK-NEXT: fcmp nnan nsz olt double %a, %b +; CHECK-NEXT: select {{.*}} double %a, double %b +; CHECK-NEXT: ret +} + +define fp128 @min4(fp128 %a, fp128 %b) #0 { + %c = call fp128 @fminl(fp128 %a, fp128 %b) + ret fp128 %c + +; CHECK-LABEL: min4( +; CHECK-NEXT: fcmp fast olt fp128 %a, %b +; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b +; CHECK-NEXT: ret +} -- 2.34.1