Value soft float calls as more expensive in the inliner.

author Cameron Esfahani <dirty@apple.com>

Thu, 5 Feb 2015 02:09:33 +0000 (02:09 +0000)

committer Cameron Esfahani <dirty@apple.com>

Thu, 5 Feb 2015 02:09:33 +0000 (02:09 +0000)
author Cameron Esfahani <dirty@apple.com>
Thu, 5 Feb 2015 02:09:33 +0000 (02:09 +0000)
committer Cameron Esfahani <dirty@apple.com>
Thu, 5 Feb 2015 02:09:33 +0000 (02:09 +0000)
diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h

index 53da46019b39a2b2b256de580f72ebb14fd37c76..1c3bcdbdf7483a2397df72838d66d94d1e35a411 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -325,6 +325,10 @@ public:
    /// \brief Return true if the hardware has a fast square-root instruction.
    bool haveFastSqrt(Type *Ty) const;
  
+  /// \brief Return the expected cost of supporting the floating point operation
+  /// of the specified type.
+  unsigned getFPOpCost(Type *Ty) const;
+
    /// \brief Return the expected cost of materializing for the given integer
    /// immediate of the specified type.
    unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
@@ -516,6 +520,7 @@ public:
    virtual bool shouldBuildLookupTables() = 0;
    virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
    virtual bool haveFastSqrt(Type *Ty) = 0;
+  virtual unsigned getFPOpCost(Type *Ty) = 0;
    virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) = 0;
    virtual unsigned getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
                                   Type *Ty) = 0;
@@ -631,6 +636,11 @@ public:
      return Impl.getPopcntSupport(IntTyWidthInBit);
    }
    bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
+
+  unsigned getFPOpCost(Type *Ty) override {
+    return Impl.getFPOpCost(Ty);
+  }
+
    unsigned getIntImmCost(const APInt &Imm, Type *Ty) override {
      return Impl.getIntImmCost(Imm, Ty);
    }
diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h

index 488f96e6ade55d089cebc81953322403540527cf..b90f6888238ef2cde99a25bc48ff7e46279e8c16 100644 (file)
--- a/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -239,6 +239,8 @@ public:
  
    bool haveFastSqrt(Type *Ty) { return false; }
  
+  unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
+
    unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
  
    unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h

index 61664327244faabe1b43240e11553e6ee9ac1602..53cdcac7258ff3ba1c0c65bdb8fc519a7cc42a89 100644 (file)
--- a/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/include/llvm/CodeGen/BasicTTIImpl.h
@@ -167,6 +167,12 @@ public:
             TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
    }
  
+  unsigned getFPOpCost(Type *Ty) {
+    // By default, FP instructions are no more expensive since they are
+    // implemented in HW.  Target specific TTI can override this.
+    return TargetTransformInfo::TCC_Basic;
+  }
+
    void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP) {
      // This unrolling functionality is target independent, but to provide some
      // motivation for its intended use, for x86:
diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp

index 166488bf67ea5e02ca5cb5da3e54020d80c7543e..c180f36b9234264d8eba0e42999c25884585a9cf 100644 (file)
--- a/lib/Analysis/IPA/InlineCost.cpp
+++ b/lib/Analysis/IPA/InlineCost.cpp
@@ -907,6 +907,25 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
      if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
        ++NumVectorInstructions;
  
+    // If the instruction is floating point, and the target says this operation is
+    // expensive or the function has the "use-soft-float" attribute, this may
+    // eventually become a library call.  Treat the cost as such.
+    if (I->getType()->isFloatingPointTy()) {
+      bool hasSoftFloatAttr = false;
+
+      // If the function has the "use-soft-float" attribute, mark it as expensive.
+      if (F.hasFnAttribute("use-soft-float")) {
+        Attribute Attr = F.getFnAttribute("use-soft-float");
+        StringRef Val = Attr.getValueAsString();
+        if (Val == "true")
+          hasSoftFloatAttr = true;
+      }
+
+      if (TTI.getFPOpCost(I->getType()) == TargetTransformInfo::TCC_Expensive ||
+          hasSoftFloatAttr)
+        Cost += InlineConstants::CallPenalty;
+    }
+
      // If the instruction simplified to a constant, there is no cost to this
      // instruction. Visit the instructions using our InstVisitor to account for
      // all of the per-instruction logic. The visit tree returns true if we
diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp

index 5a50d36ee8ba3f00b6cc3c75deba8eaa91443f11..b5440e2a2c3711d56a91c987c0175ba2ed742360 100644 (file)
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -148,6 +148,10 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
    return TTIImpl->haveFastSqrt(Ty);
  }
  
+unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const {
+  return TTIImpl->getFPOpCost(Ty);
+}
+
  unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
    return TTIImpl->getIntImmCost(Imm, Ty);
  }
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h

index dd1ab3aabcfb9f43645f5523e56c9b7ecab14af1..64d499a4dc5dcfe3267f6599ea60fc25e48f07f2 100644 (file)
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -310,7 +310,8 @@ public:
    bool hasCRC() const { return HasCRC; }
    bool hasVirtualization() const { return HasVirtualization; }
    bool useNEONForSinglePrecisionFP() const {
-    return hasNEON() && UseNEONForSinglePrecisionFP; }
+    return hasNEON() && UseNEONForSinglePrecisionFP;
+  }
  
    bool hasDivide() const { return HasHardwareDivide; }
    bool hasDivideInARMMode() const { return HasHardwareDivideInARM; }
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp

index 1cb1efb192462f1666d73ca6bfbd3b8be311b11a..4e1b371640bcdee2a260e93679326d0ef3917d95 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -314,6 +314,25 @@ unsigned ARMTTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
    return 1;
  }
  
+unsigned ARMTTIImpl::getFPOpCost(Type *Ty) {
+  // Use similar logic that's in ARMISelLowering:
+  // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
+  // to VFP.
+
+  if (ST->hasVFP2() && !ST->isThumb1Only()) {
+    if (Ty->isFloatTy()) {
+      return TargetTransformInfo::TCC_Basic;
+    }
+
+    if (Ty->isDoubleTy()) {
+      return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
+        TargetTransformInfo::TCC_Basic;
+    }
+  }
+
+  return TargetTransformInfo::TCC_Expensive;
+}
+
  unsigned ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                      Type *SubTp) {
    // We only handle costs of reverse and alternate shuffles for now.
diff --git a/lib/Target/ARM/ARMTargetTransformInfo.h b/lib/Target/ARM/ARMTargetTransformInfo.h

index 6167fae9c8aafac68b6db429acccdb293757a8f8..97590f60893aff5b85c40a67223b7deed3d65e8e 100644 (file)
--- a/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -114,6 +114,8 @@ public:
  
    unsigned getAddressComputationCost(Type *Val, bool IsComplex);
  
+  unsigned getFPOpCost(Type *Ty);
+
    unsigned getArithmeticInstrCost(
        unsigned Opcode, Type *Ty,
        TTI::OperandValueKind Op1Info = TTI::OK_AnyValue,
diff --git a/test/Transforms/Inline/inline-fp.ll b/test/Transforms/Inline/inline-fp.ll

new file mode 100644 (file)

index 0000000..4d18ce8
--- /dev/null
+++ b/test/Transforms/Inline/inline-fp.ll
@@ -0,0 +1,136 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+; Make sure that soft float implementations are calculated as being more expensive
+; to the inliner.
+
+define i32 @test_nofp() #0 {
+; f_nofp() has the "use-soft-float" attribute, so it should never get inlined.
+; CHECK-LABEL: test_nofp
+; CHECK: call float @f_nofp 
+entry:
+  %responseX = alloca i32, align 4
+  %responseY = alloca i32, align 4
+  %responseZ = alloca i32, align 4
+  %valueX = alloca i8, align 1
+  %valueY = alloca i8, align 1
+  %valueZ = alloca i8, align 1
+
+  call void @getX(i32* %responseX, i8* %valueX)
+  call void @getY(i32* %responseY, i8* %valueY)
+  call void @getZ(i32* %responseZ, i8* %valueZ)
+
+  %0 = load i32* %responseX
+  %1 = load i8* %valueX
+  %call = call float @f_nofp(i32 %0, i8 zeroext %1)
+  %2 = load i32* %responseZ
+  %3 = load i8* %valueZ
+  %call2 = call float @f_nofp(i32 %2, i8 zeroext %3)
+  %call3 = call float @fabsf(float %call)
+  %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
+  br i1 %cmp, label %if.end12, label %if.else
+
+if.else:                                          ; preds = %entry
+  %4 = load i32* %responseY
+  %5 = load i8* %valueY
+  %call1 = call float @f_nofp(i32 %4, i8 zeroext %5)
+  %call4 = call float @fabsf(float %call1)
+  %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
+  br i1 %cmp5, label %if.end12, label %if.else7
+
+if.else7:                                         ; preds = %if.else
+  %call8 = call float @fabsf(float %call2)
+  %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
+  br i1 %cmp9, label %if.then10, label %if.end12
+
+if.then10:                                        ; preds = %if.else7
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.else, %entry, %if.then10, %if.else7
+  %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
+  ret i32 %success.0
+}
+
+define i32 @test_hasfp() #0 {
+; f_hasfp()  does not have the "use-soft-float" attribute, so it should get inlined.
+; CHECK-LABEL: test_hasfp
+; CHECK-NOT: call float @f_hasfp 
+entry:
+  %responseX = alloca i32, align 4
+  %responseY = alloca i32, align 4
+  %responseZ = alloca i32, align 4
+  %valueX = alloca i8, align 1
+  %valueY = alloca i8, align 1
+  %valueZ = alloca i8, align 1
+
+  call void @getX(i32* %responseX, i8* %valueX)
+  call void @getY(i32* %responseY, i8* %valueY)
+  call void @getZ(i32* %responseZ, i8* %valueZ)
+
+  %0 = load i32* %responseX
+  %1 = load i8* %valueX
+  %call = call float @f_hasfp(i32 %0, i8 zeroext %1)
+  %2 = load i32* %responseZ
+  %3 = load i8* %valueZ
+  %call2 = call float @f_hasfp(i32 %2, i8 zeroext %3)
+  %call3 = call float @fabsf(float %call)
+  %cmp = fcmp ogt float %call3, 0x3FC1EB8520000000
+  br i1 %cmp, label %if.end12, label %if.else
+
+if.else:                                          ; preds = %entry
+  %4 = load i32* %responseY
+  %5 = load i8* %valueY
+  %call1 = call float @f_hasfp(i32 %4, i8 zeroext %5)
+  %call4 = call float @fabsf(float %call1)
+  %cmp5 = fcmp ogt float %call4, 0x3FC1EB8520000000
+  br i1 %cmp5, label %if.end12, label %if.else7
+
+if.else7:                                         ; preds = %if.else
+  %call8 = call float @fabsf(float %call2)
+  %cmp9 = fcmp ogt float %call8, 0x3FC1EB8520000000
+  br i1 %cmp9, label %if.then10, label %if.end12
+
+if.then10:                                        ; preds = %if.else7
+  br label %if.end12
+
+if.end12:                                         ; preds = %if.else, %entry, %if.then10, %if.else7
+  %success.0 = phi i32 [ 0, %if.then10 ], [ 1, %if.else7 ], [ 0, %entry ], [ 0, %if.else ]
+  ret i32 %success.0
+}
+
+declare void @getX(i32*, i8*) #0
+
+declare void @getY(i32*, i8*) #0
+
+declare void @getZ(i32*, i8*) #0
+
+define internal float @f_hasfp(i32 %response, i8 zeroext %value1) #0 {
+entry:
+  %conv = zext i8 %value1 to i32
+  %sub = add nsw i32 %conv, -1
+  %conv1 = sitofp i32 %sub to float
+  %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+  %mul = fmul float %0, 2.620000e+03
+  %conv2 = sitofp i32 %response to float
+  %sub3 = fsub float %conv2, %mul
+  %div = fdiv float %sub3, %mul
+  ret float %div
+}
+
+define internal float @f_nofp(i32 %response, i8 zeroext %value1) #1 {
+entry:
+  %conv = zext i8 %value1 to i32
+  %sub = add nsw i32 %conv, -1
+  %conv1 = sitofp i32 %sub to float
+  %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1)
+  %mul = fmul float %0, 2.620000e+03
+  %conv2 = sitofp i32 %response to float
+  %sub3 = fsub float %conv2, %mul
+  %div = fdiv float %sub3, %mul
+  ret float %div
+}
+
+declare float @fabsf(float) optsize minsize
+
+declare float @llvm.pow.f32(float, float) optsize minsize
+
+attributes #0 = { minsize optsize }
+attributes #1 = { minsize optsize "use-soft-float"="true" }
author	Cameron Esfahani <dirty@apple.com>
	Thu, 5 Feb 2015 02:09:33 +0000 (02:09 +0000)
committer	Cameron Esfahani <dirty@apple.com>
	Thu, 5 Feb 2015 02:09:33 +0000 (02:09 +0000)
include/llvm/Analysis/TargetTransformInfo.h		patch \| blob \| history
include/llvm/Analysis/TargetTransformInfoImpl.h		patch \| blob \| history
include/llvm/CodeGen/BasicTTIImpl.h		patch \| blob \| history
lib/Analysis/IPA/InlineCost.cpp		patch \| blob \| history
lib/Analysis/TargetTransformInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMSubtarget.h		patch \| blob \| history
lib/Target/ARM/ARMTargetTransformInfo.cpp		patch \| blob \| history
lib/Target/ARM/ARMTargetTransformInfo.h		patch \| blob \| history
test/Transforms/Inline/inline-fp.ll	[new file with mode: 0644]	patch \| blob