Code Model: Improve the accuracy of the zext/sext/trunc vector cost estimation.

author Nadav Rotem <nrotem@apple.com>

Mon, 5 Nov 2012 22:20:53 +0000 (22:20 +0000)

committer Nadav Rotem <nrotem@apple.com>

Mon, 5 Nov 2012 22:20:53 +0000 (22:20 +0000)
author Nadav Rotem <nrotem@apple.com>
Mon, 5 Nov 2012 22:20:53 +0000 (22:20 +0000)
committer Nadav Rotem <nrotem@apple.com>
Mon, 5 Nov 2012 22:20:53 +0000 (22:20 +0000)
diff --git a/lib/Target/TargetTransformImpl.cpp b/lib/Target/TargetTransformImpl.cpp

index 4b427a2b6d4426f19f2eb0392ff599d6b1f0c474..ca0dd9a54ed8b4cd74cd421dcafd9703a2ca2df5 100644 (file)
--- a/lib/Target/TargetTransformImpl.cpp
+++ b/lib/Target/TargetTransformImpl.cpp
@@ -101,7 +101,7 @@ int VectorTargetTransformImpl::InstructionOpcodeToISD(unsigned Opcode) const {
    case AtomicRMW:      return 0;
    case Trunc:          return ISD::TRUNCATE;
    case ZExt:           return ISD::ZERO_EXTEND;
-  case SExt:           return ISD::SEXTLOAD;
+  case SExt:           return ISD::SIGN_EXTEND;
    case FPToUI:         return ISD::FP_TO_UINT;
    case FPToSI:         return ISD::FP_TO_SINT;
    case UIToFP:         return ISD::UINT_TO_FP;
@@ -235,9 +235,17 @@ unsigned VectorTargetTransformImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
          SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
  
        // Bitcast between types that are legalized to the same type are free.
-      if (Opcode == Instruction::BitCast)
+      if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
          return 0;
  
+      // Assume that Zext is done using AND.
+      if (Opcode == Instruction::ZExt)
+        return 1;
+
+      // Assume that sext is done using SHL and SRA.
+      if (Opcode == Instruction::SExt)
+        return 2;
+
        // Just check the op cost. If the operation is legal then assume it costs
        // 1 and multiply by the type-legalization overhead.
        if (!TLI->isOperationExpand(ISD, DstLT.second))
@@ -310,7 +318,6 @@ unsigned VectorTargetTransformImpl::getCmpSelInstrCost(unsigned Opcode,
    return 1;
  }
  
-/// Returns the expected cost of Vector Insert and Extract.
  unsigned VectorTargetTransformImpl::getVectorInstrCost(unsigned Opcode,
                                                         Type *Val,
                                                         unsigned Index) const {
diff --git a/test/Analysis/CostModel/X86/cast.ll b/test/Analysis/CostModel/X86/cast.ll

new file mode 100644 (file)

index 0000000..f8b1114
--- /dev/null
+++ b/test/Analysis/CostModel/X86/cast.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @add(i32 %arg) {
+
+  ; -- Same size registeres --
+  ;CHECK: cost of 1 {{.*}} zext
+  %A = zext <4 x i1> undef to <4 x i32>
+  ;CHECK: cost of 2 {{.*}} sext
+  %B = sext <4 x i1> undef to <4 x i32>
+  ;CHECK: cost of 0 {{.*}} trunc
+  %C = trunc <4 x i32> undef to <4 x i1>
+
+  ; -- Different size registers --
+  ;CHECK-NOT: cost of 1 {{.*}} zext
+  %D = zext <8 x i1> undef to <8 x i32>
+  ;CHECK-NOT: cost of 2 {{.*}} sext
+  %E = sext <8 x i1> undef to <8 x i32>
+  ;CHECK-NOT: cost of 2 {{.*}} trunc
+  %F = trunc <8 x i32> undef to <8 x i1>
+
+  ; -- scalars --
+
+  ;CHECK: cost of 1 {{.*}} zext
+  %G = zext i1 undef to i32
+  ;CHECK: cost of 0 {{.*}} trunc
+  %H = trunc i32 undef to i1
+
+  ;CHECK: cost of 1 {{.*}} ret
+  ret i32 undef
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll

index 19bcdc5d902741233a9f7f8f9fd8c0e5e5a390fe..8f1bb545fa019d3baac495714cb46ef238a7d543 100644 (file)
--- a/test/Transforms/LoopVectorize/X86/conversion-cost.ll
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -4,7 +4,7 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
  target triple = "x86_64-apple-macosx10.8.0"
  
  ;CHECK: @conversion_cost1
-;CHECK: store <8 x i8>
+;CHECK: store <2 x i8>
  ;CHECK: ret
  define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
    %1 = icmp sgt i32 %n, 3
@@ -25,7 +25,7 @@ define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) noun
  }
  
  ;CHECK: @conversion_cost2
-;CHECK-NOT: <8 x float>
+;CHECK: <2 x float>
  ;CHECK: ret
  define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
    %1 = icmp sgt i32 %n, 9
author	Nadav Rotem <nrotem@apple.com>
	Mon, 5 Nov 2012 22:20:53 +0000 (22:20 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Mon, 5 Nov 2012 22:20:53 +0000 (22:20 +0000)
lib/Target/TargetTransformImpl.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/cast.ll	[new file with mode: 0644]	patch \| blob
test/Transforms/LoopVectorize/X86/conversion-cost.ll		patch \| blob \| history