#include "llvm/Support/CommandLine.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
using namespace llvm;
using namespace PatternMatch;
}
/// \brief Check whether the overloaded unary floating point function
-/// corresponing to \a Ty is available.
+/// corresponding to \a Ty is available.
static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
LibFunc::Func LongDoubleFn) {
Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
if (Dst == Src) { // stpcpy(x,x) -> x+strlen(x)
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
// See if we can get the length of the input string.
return B.CreateSub(LHSV, RHSV, "chardiff");
}
+ // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+ if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+
+ IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+ unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
+
+ if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
+ getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
+
+ Type *LHSPtrTy =
+ IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+ Type *RHSPtrTy =
+ IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+
+ Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+ Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+
+ return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+ }
+ }
+
// Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
StringRef LHSStr, RHSStr;
if (getConstantStringInfo(LHS, LHSStr) &&
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
Module *M = CI->getParent()->getParent()->getParent();
- Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID();
+ Intrinsic::ID IID = Callee->getIntrinsicID();
Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
V = B.CreateCall(F, V);
} else {
Value *Callee =
M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
Op->getType(), B.getInt32Ty(), nullptr);
- CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ CallInst *CI = B.CreateCall(Callee, {One, LdExpArg});
if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
return Ret;
}
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+ // If we can shrink the call to a float function rather than a double
+ // function, do that first.
+ Function *Callee = CI->getCalledFunction();
+ if ((Callee->getName() == "fmin" && TLI->has(LibFunc::fminf)) ||
+ (Callee->getName() == "fmax" && TLI->has(LibFunc::fmaxf))) {
+ Value *Ret = optimizeBinaryDoubleFP(CI, B);
+ if (Ret)
+ return Ret;
+ }
+
+ // Make sure this has 2 arguments of FP type which match the result type.
+ FunctionType *FT = Callee->getFunctionType();
+ if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ FT->getParamType(0) != FT->getParamType(1) ||
+ !FT->getParamType(0)->isFloatingPointTy())
+ return nullptr;
+
+ // FIXME: For finer-grain optimization, we need intrinsics to have the same
+ // fast-math flag decorations that are applied to FP instructions. For now,
+ // we have to rely on the function-level attributes to do this optimization
+ // because there's no other way to express that the calls can be relaxed.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ FastMathFlags FMF;
+ Function *F = CI->getParent()->getParent();
+ Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+ if (Attr.getValueAsString() == "true") {
+ // Unsafe algebra sets all fast-math-flags to true.
+ FMF.setUnsafeAlgebra();
+ } else {
+ // At a minimum, no-nans-fp-math must be true.
+ Attr = F->getFnAttribute("no-nans-fp-math");
+ if (Attr.getValueAsString() != "true")
+ return nullptr;
+ // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
+ // "Ideally, fmax would be sensitive to the sign of zero, for example
+ // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
+ // might be impractical."
+ FMF.setNoSignedZeros();
+ FMF.setNoNaNs();
+ }
+ B.SetFastMathFlags(FMF);
+
+ // We have a relaxed floating-point environment. We can ignore NaN-handling
+ // and transform to a compare and select. We do not have to consider errno or
+ // exceptions, because fmin/fmax do not have those.
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ Value *Cmp = Callee->getName().startswith("fmin") ?
+ B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
+ return B.CreateSelect(Cmp, Op0, Op1);
+}
+
Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
// and multiply.
// FIXME: We're not checking the sqrt because it doesn't have
// fast-math-flags (see earlier comment).
- IRBuilder<true, ConstantFolder,
- IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
+ IRBuilder<>::FastMathFlagGuard Guard(B);
B.SetFastMathFlags(I->getFastMathFlags());
// If we found a repeated factor, hoist it out of the square root and
// replace it with the fabs of that factor.
Type *ArgType = Op->getType();
Value *F =
Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
- Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+ Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
V = B.CreateIntCast(V, B.getInt32Ty(), false);
return optimizeUnaryDoubleFP(CI, Builder, true);
return nullptr;
case LibFunc::copysign:
- case LibFunc::fmin:
- case LibFunc::fmax:
if (hasFloatVersion(FuncName))
return optimizeBinaryDoubleFP(CI, Builder);
return nullptr;
+ case LibFunc::fminf:
+ case LibFunc::fmin:
+ case LibFunc::fminl:
+ case LibFunc::fmaxf:
+ case LibFunc::fmax:
+ case LibFunc::fmaxl:
+ return optimizeFMinFMax(CI, Builder);
default:
return nullptr;
}
// cbrt:
// * cbrt(expN(X)) -> expN(x/3)
// * cbrt(sqrt(x)) -> pow(x,1/6)
-// * cbrt(sqrt(x)) -> pow(x,1/9)
+// * cbrt(cbrt(x)) -> pow(x,1/9)
//
// exp, expf, expl:
// * exp(log(x)) -> x
// __stpcpy_chk(x,x,...) -> x+strlen(x)
if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
Value *StrLen = EmitStrLen(Src, B, DL, TLI);
- return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+ return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
}
// If a) we don't have any length information, or b) we know this will