CostModel: increase the default cost of supported floating point operations from...

author Nadav Rotem <nrotem@apple.com>

Fri, 12 Apr 2013 21:15:03 +0000 (21:15 +0000)

committer Nadav Rotem <nrotem@apple.com>

Fri, 12 Apr 2013 21:15:03 +0000 (21:15 +0000)
author Nadav Rotem <nrotem@apple.com>
Fri, 12 Apr 2013 21:15:03 +0000 (21:15 +0000)
committer Nadav Rotem <nrotem@apple.com>
Fri, 12 Apr 2013 21:15:03 +0000 (21:15 +0000)
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp

index 012ff8ad8339787e0d2b4992999e1e87e49bf978..499571fcac64f704a2894535cbb6f4bc7d5ff067 100644 (file)
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -204,20 +204,23 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
  
    std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
  
+  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
+  unsigned OpCost = (IsFloat ? 2 : 1);
+
    if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
      // The operation is legal. Assume it costs 1.
      // If the type is split to multiple registers, assume that thre is some
      // overhead to this.
      // TODO: Once we have extract/insert subvector cost we need to use them.
      if (LT.first > 1)
-      return LT.first * 2;
-    return LT.first * 1;
+      return LT.first * 2 * OpCost;
+    return LT.first * 1 * OpCost;
    }
  
    if (!TLI->isOperationExpand(ISD, LT.second)) {
      // If the operation is custom lowered then assume
      // thare the code is twice as expensive.
-    return LT.first * 2;
+    return LT.first * 2 * OpCost;
    }
  
    // Else, assume that we need to scalarize this op.
@@ -230,7 +233,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
    }
  
    // We don't know anything about this scalar instruction.
-  return 1;
+  return OpCost;
  }
  
  unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
diff --git a/test/Analysis/CostModel/X86/arith.ll b/test/Analysis/CostModel/X86/arith.ll

index 85b442533f412b758198ac9b81ec5843ef0513be..92f5a1ec3a00a076407da48b928e318d3e795dbb 100644 (file)
--- a/test/Analysis/CostModel/X86/arith.ll
+++ b/test/Analysis/CostModel/X86/arith.ll
@@ -66,9 +66,9 @@ define void @avx2mull() {
  
  ; CHECK: fmul
  define i32 @fmul(i32 %arg) {
-  ;CHECK: cost of 1 {{.*}} fmul
+  ;CHECK: cost of 2 {{.*}} fmul
    %A = fmul <4 x float> undef, undef
-  ;CHECK: cost of 1 {{.*}} fmul
+  ;CHECK: cost of 2 {{.*}} fmul
    %B = fmul <8 x float> undef, undef
    ret i32 undef
  }
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll

index 493f23b09853923f14fc1d43821e5517f2561f3d..bbf565d1cc7f0db3b4ab3a51c88f5060b9ee9ab5 100644 (file)
--- a/test/Transforms/BBVectorize/X86/loop1.ll
+++ b/test/Transforms/BBVectorize/X86/loop1.ll
@@ -34,7 +34,15 @@ for.body:                                         ; preds = %for.body, %entry
    %lftr.wideiv = trunc i64 %indvars.iv.next to i32
    %exitcond = icmp eq i32 %lftr.wideiv, 10
    br i1 %exitcond, label %for.end, label %for.body
-; CHECK-NOT: <2 x double>
+; CHECK: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fadd <2 x double>
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fadd <2 x double>
+; CHECK-NEXT: insertelement
+; CHECK-NEXT: fmul <2 x double>
+
  ; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
  ; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
  ; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll

index 0113e38bb1c91f9111f53156dcfe71770ac86091..8abfa5f8bd29a5e329679a2f86101e00b8d03fc8 100644 (file)
--- a/test/Transforms/BBVectorize/X86/simple.ll
+++ b/test/Transforms/BBVectorize/X86/simple.ll
@@ -12,7 +12,11 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
         %R  = fmul double %Z1, %Z2
         ret double %R
  ; CHECK: @test1
-; CHECK-NOT: fmul <2 x double>
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: fadd <2 x double>
+; CHECK: extract
+; CHECK: extract
  ; CHECK: ret double %R
  }
  
@@ -63,7 +67,12 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
         %R  = fmul double %Z1, %Z2
         ret double %R
  ; CHECK: @test2
-; CHECK-NOT: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
  ; CHECK: ret double %R
  }
  
@@ -80,7 +89,15 @@ define double @test4(double %A1, double %A2, double %B1, double %B2) {
         %R  = fmul double %Z1, %Z2
         ret double %R
  ; CHECK: @test4
-; CHECK-NOT: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fsub <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: fadd <2 x double>
  ; CHECK: ret double %R
  }
author	Nadav Rotem <nrotem@apple.com>
	Fri, 12 Apr 2013 21:15:03 +0000 (21:15 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Fri, 12 Apr 2013 21:15:03 +0000 (21:15 +0000)
lib/CodeGen/BasicTargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/arith.ll		patch \| blob \| history
test/Transforms/BBVectorize/X86/loop1.ll		patch \| blob \| history
test/Transforms/BBVectorize/X86/simple.ll		patch \| blob \| history