Prune utf8 chars in comments.

[oota-llvm.git] / lib / Transforms / Utils / SimplifyLibCalls.cpp
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp

index 2230c9814b23e4cf7c8b0e8f48cd6cdbe2b3cf25..763e3c05a9daeb6a91c536421800838292e9587c 100644 (file)
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -32,6 +32,7 @@
  #include "llvm/Support/CommandLine.h"
  #include "llvm/Analysis/TargetLibraryInfo.h"
  #include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/Transforms/Utils/Local.h"
  
  using namespace llvm;
  using namespace PatternMatch;
@@ -102,7 +103,7 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {
  }
  
  /// \brief Check whether the overloaded unary floating point function
-/// corresponing to \a Ty is available.
+/// corresponding to \a Ty is available.
  static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty,
                              LibFunc::Func DoubleFn, LibFunc::Func FloatFn,
                              LibFunc::Func LongDoubleFn) {
@@ -476,7 +477,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {
    Value *Dst = CI->getArgOperand(0), *Src = CI->getArgOperand(1);
    if (Dst == Src) { // stpcpy(x,x)  -> x+strlen(x)
      Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
    }
  
    // See if we can get the length of the input string.
@@ -862,6 +863,27 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {
      return B.CreateSub(LHSV, RHSV, "chardiff");
    }
  
+  // memcmp(S1,S2,N/8)==0 -> (*(intN_t*)S1 != *(intN_t*)S2)==0
+  if (DL.isLegalInteger(Len * 8) && isOnlyUsedInZeroEqualityComparison(CI)) {
+
+    IntegerType *IntType = IntegerType::get(CI->getContext(), Len * 8);
+    unsigned PrefAlignment = DL.getPrefTypeAlignment(IntType);
+
+    if (getKnownAlignment(LHS, DL, CI) >= PrefAlignment &&
+        getKnownAlignment(RHS, DL, CI) >= PrefAlignment) {
+
+      Type *LHSPtrTy =
+          IntType->getPointerTo(LHS->getType()->getPointerAddressSpace());
+      Type *RHSPtrTy =
+          IntType->getPointerTo(RHS->getType()->getPointerAddressSpace());
+
+      Value *LHSV = B.CreateLoad(B.CreateBitCast(LHS, LHSPtrTy, "lhsc"), "lhsv");
+      Value *RHSV = B.CreateLoad(B.CreateBitCast(RHS, RHSPtrTy, "rhsc"), "rhsv");
+
+      return B.CreateZExt(B.CreateICmpNE(LHSV, RHSV), CI->getType(), "memcmp");
+    }
+  }
+
    // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
    StringRef LHSStr, RHSStr;
    if (getConstantStringInfo(LHS, LHSStr) &&
@@ -973,7 +995,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
    // floor((double)floatval) -> (double)floorf(floatval)
    if (Callee->isIntrinsic()) {
      Module *M = CI->getParent()->getParent()->getParent();
-    Intrinsic::ID IID = (Intrinsic::ID) Callee->getIntrinsicID();
+    Intrinsic::ID IID = Callee->getIntrinsicID();
      Function *F = Intrinsic::getDeclaration(M, IID, B.getFloatTy());
      V = B.CreateCall(F, V);
    } else {
@@ -1150,7 +1172,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilder<> &B) {
        Value *Callee =
            M->getOrInsertFunction(TLI->getName(LdExp), Op->getType(),
                                   Op->getType(), B.getInt32Ty(), nullptr);
-      CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+      CallInst *CI = B.CreateCall(Callee, {One, LdExpArg});
        if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
          CI->setCallingConv(F->getCallingConv());
  
@@ -1184,6 +1206,59 @@ Value *LibCallSimplifier::optimizeFabs(CallInst *CI, IRBuilder<> &B) {
    return Ret;
  }
  
+Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
+  // If we can shrink the call to a float function rather than a double
+  // function, do that first.
+  Function *Callee = CI->getCalledFunction();
+  if ((Callee->getName() == "fmin" && TLI->has(LibFunc::fminf)) ||
+      (Callee->getName() == "fmax" && TLI->has(LibFunc::fmaxf))) {
+    Value *Ret = optimizeBinaryDoubleFP(CI, B);
+    if (Ret)
+      return Ret;
+  }
+
+  // Make sure this has 2 arguments of FP type which match the result type.
+  FunctionType *FT = Callee->getFunctionType();
+  if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+      FT->getParamType(0) != FT->getParamType(1) ||
+      !FT->getParamType(0)->isFloatingPointTy())
+    return nullptr;
+
+  // FIXME: For finer-grain optimization, we need intrinsics to have the same
+  // fast-math flag decorations that are applied to FP instructions. For now,
+  // we have to rely on the function-level attributes to do this optimization
+  // because there's no other way to express that the calls can be relaxed.
+  IRBuilder<>::FastMathFlagGuard Guard(B);
+  FastMathFlags FMF;
+  Function *F = CI->getParent()->getParent();
+  Attribute Attr = F->getFnAttribute("unsafe-fp-math");
+  if (Attr.getValueAsString() == "true") {
+    // Unsafe algebra sets all fast-math-flags to true.
+    FMF.setUnsafeAlgebra();
+  } else {
+    // At a minimum, no-nans-fp-math must be true.
+    Attr = F->getFnAttribute("no-nans-fp-math");
+    if (Attr.getValueAsString() != "true")
+      return nullptr;
+    // No-signed-zeros is implied by the definitions of fmax/fmin themselves:
+    // "Ideally, fmax would be sensitive to the sign of zero, for example
+    // fmax(-0. 0, +0. 0) would return +0; however, implementation in software
+    // might be impractical."
+    FMF.setNoSignedZeros();
+    FMF.setNoNaNs();
+  }
+  B.SetFastMathFlags(FMF);
+
+  // We have a relaxed floating-point environment. We can ignore NaN-handling
+  // and transform to a compare and select. We do not have to consider errno or
+  // exceptions, because fmin/fmax do not have those.
+  Value *Op0 = CI->getArgOperand(0);
+  Value *Op1 = CI->getArgOperand(1);
+  Value *Cmp = Callee->getName().startswith("fmin") ?
+    B.CreateFCmpOLT(Op0, Op1) : B.CreateFCmpOGT(Op0, Op1);
+  return B.CreateSelect(Cmp, Op0, Op1);
+}
+
  Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
    Function *Callee = CI->getCalledFunction();
    
@@ -1238,8 +1313,7 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
          // and multiply.
          // FIXME: We're not checking the sqrt because it doesn't have
          // fast-math-flags (see earlier comment).
-        IRBuilder<true, ConstantFolder,
-          IRBuilderDefaultInserter<true> >::FastMathFlagGuard Guard(B);
+        IRBuilder<>::FastMathFlagGuard Guard(B);
          B.SetFastMathFlags(I->getFastMathFlags());
          // If we found a repeated factor, hoist it out of the square root and
          // replace it with the fabs of that factor.
@@ -1436,7 +1510,7 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
    Type *ArgType = Op->getType();
    Value *F =
        Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
-  Value *V = B.CreateCall2(F, Op, B.getFalse(), "cttz");
+  Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
    V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
    V = B.CreateIntCast(V, B.getInt32Ty(), false);
  
@@ -2110,11 +2184,16 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
          return optimizeUnaryDoubleFP(CI, Builder, true);
        return nullptr;
      case LibFunc::copysign:
-    case LibFunc::fmin:
-    case LibFunc::fmax:
        if (hasFloatVersion(FuncName))
          return optimizeBinaryDoubleFP(CI, Builder);
        return nullptr;
+    case LibFunc::fminf:
+    case LibFunc::fmin:
+    case LibFunc::fminl:
+    case LibFunc::fmaxf:
+    case LibFunc::fmax:
+    case LibFunc::fmaxl:
+      return optimizeFMinFMax(CI, Builder);
      default:
        return nullptr;
      }
@@ -2145,7 +2224,7 @@ void LibCallSimplifier::replaceAllUsesWith(Instruction *I, Value *With) {
  // cbrt:
  //   * cbrt(expN(X))  -> expN(x/3)
  //   * cbrt(sqrt(x))  -> pow(x,1/6)
-//   * cbrt(sqrt(x))  -> pow(x,1/9)
+//   * cbrt(cbrt(x))  -> pow(x,1/9)
  //
  // exp, expf, expl:
  //   * exp(log(x))  -> x
@@ -2276,7 +2355,7 @@ Value *FortifiedLibCallSimplifier::optimizeStrpCpyChk(CallInst *CI,
    // __stpcpy_chk(x,x,...)  -> x+strlen(x)
    if (Func == LibFunc::stpcpy_chk && !OnlyLowerUnknownSize && Dst == Src) {
      Value *StrLen = EmitStrLen(Src, B, DL, TLI);
-    return StrLen ? B.CreateInBoundsGEP(Dst, StrLen) : nullptr;
+    return StrLen ? B.CreateInBoundsGEP(B.getInt8Ty(), Dst, StrLen) : nullptr;
    }
  
    // If a) we don't have any length information, or b) we know this will