Improve the X86 cost model for loads and stores.

author Nadav Rotem <nrotem@apple.com>

Fri, 21 Dec 2012 01:33:59 +0000 (01:33 +0000)

committer Nadav Rotem <nrotem@apple.com>

Fri, 21 Dec 2012 01:33:59 +0000 (01:33 +0000)
author Nadav Rotem <nrotem@apple.com>
Fri, 21 Dec 2012 01:33:59 +0000 (01:33 +0000)
committer Nadav Rotem <nrotem@apple.com>
Fri, 21 Dec 2012 01:33:59 +0000 (01:33 +0000)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp

index 1e64741c257797fccc0d9d2745f7a78c6cce0e31..6b650726b622c158bf09dcc4c561298c88201f7a 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -17818,6 +17818,30 @@ X86VectorTargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
    return VectorTargetTransformImpl::getArithmeticInstrCost(Opcode, Ty);
  }
  
+
+unsigned
+X86VectorTargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
+                                              unsigned Alignment,
+                                              unsigned AddressSpace) const {
+  // Legalize the type.
+  std::pair<unsigned, MVT> LT = getTypeLegalizationCost(Src);
+  assert(Opcode == Instruction::Load || Opcode == Instruction::Store &&
+         "Invalid Opcode");
+
+  const X86Subtarget &ST =
+  TLI->getTargetMachine().getSubtarget<X86Subtarget>();
+
+  // Each load/store unit costs 1.
+  unsigned Cost = LT.first * 1;
+
+  // On Sandybridge 256bit load/stores are double pumped
+  // (but not on Haswell).
+  if (LT.second.getSizeInBits() > 128 && !ST.hasAVX2())
+    Cost*=2;
+
+  return Cost;
+}
+
  unsigned
  X86VectorTargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                   unsigned Index) const {
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h

index 5be7f095a4707d9e048599109730b57e479dd090..72cd3b3f5bc139fdbbd4d2c5bc1abfd9afe85019 100644 (file)
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -953,6 +953,10 @@ namespace llvm {
  
      virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
  
+    virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src,
+                                     unsigned Alignment,
+                                     unsigned AddressSpace) const;
+
      virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val,
                                          unsigned Index) const;
  
diff --git a/test/Analysis/CostModel/X86/load_store.ll b/test/Analysis/CostModel/X86/load_store.ll

new file mode 100644 (file)

index 0000000..4195b1d
--- /dev/null
+++ b/test/Analysis/CostModel/X86/load_store.ll
@@ -0,0 +1,64 @@
+; RUN: opt < %s  -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @stores(i32 %arg) {
+
+  ;CHECK: cost of 1 {{.*}} store
+  store i8 undef, i8* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store i16 undef, i16* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store i32 undef, i32* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store i64 undef, i64* undef, align 4
+  ;CHECK: cost of 2 {{.*}} store
+  store i128 undef, i128* undef, align 4
+
+  ;CHECK: cost of 1 {{.*}} store
+  store <4 x i16> undef, <4 x i16>* undef, align 4
+  ;CHECK: cost of 1 {{.*}} store
+  store <4 x i32> undef, <4 x i32>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} store
+  store <4 x i64> undef, <4 x i64>* undef, align 4
+
+  ;CHECK: cost of 1 {{.*}} store
+  store <8 x i16> undef, <8 x i16>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} store
+  store <8 x i32> undef, <8 x i32>* undef, align 4
+  ;CHECK: cost of 4 {{.*}} store
+  store <8 x i64> undef, <8 x i64>* undef, align 4
+
+  ret i32 undef
+}
+define i32 @loads(i32 %arg) {
+  ;CHECK: cost of 1 {{.*}} load
+  load i8* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load i16* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load i32* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load i64* undef, align 4
+  ;CHECK: cost of 2 {{.*}} load
+  load i128* undef, align 4
+
+  ;CHECK: cost of 1 {{.*}} load
+  load <2 x i32>* undef, align 4
+  ;CHECK: cost of 1 {{.*}} load
+  load <4 x i32>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} load
+  load <8 x i32>* undef, align 4
+
+
+  ;CHECK: cost of 1 {{.*}} load
+  load <2 x i64>* undef, align 4
+  ;CHECK: cost of 2 {{.*}} load
+  load <4 x i64>* undef, align 4
+  ;CHECK: cost of 4 {{.*}} load
+  load <8 x i64>* undef, align 4
+
+  ret i32 undef
+}
+
diff --git a/test/Analysis/CostModel/X86/vectorized-loop.ll b/test/Analysis/CostModel/X86/vectorized-loop.ll

index 6c9e111bb10f7b1a336b900d2e394a12cd49febb..25b11145c661ed5b779101a0a30a27fe15771c02 100644 (file)
--- a/test/Analysis/CostModel/X86/vectorized-loop.ll
+++ b/test/Analysis/CostModel/X86/vectorized-loop.ll
@@ -28,16 +28,17 @@ vector.body:                                      ; preds = %for.body.lr.ph, %ve
    %4 = getelementptr inbounds i32* %B, i64 %3
    ;CHECK: cost of 0 {{.*}} bitcast
    %5 = bitcast i32* %4 to <8 x i32>*
-  ;CHECK: cost of 1 {{.*}} load
+  ;CHECK: cost of 2 {{.*}} load
    %6 = load <8 x i32>* %5, align 4
    ;CHECK: cost of 4 {{.*}} mul
    %7 = mul nsw <8 x i32> %6, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
    %8 = getelementptr inbounds i32* %A, i64 %index
    %9 = bitcast i32* %8 to <8 x i32>*
+  ;CHECK: cost of 2 {{.*}} load
    %10 = load <8 x i32>* %9, align 4
    ;CHECK: cost of 4 {{.*}} add
    %11 = add nsw <8 x i32> %10, %7
-  ;CHECK: cost of 1 {{.*}} store
+  ;CHECK: cost of 2 {{.*}} store
    store <8 x i32> %11, <8 x i32>* %9, align 4
    %index.next = add i64 %index, 8
    %12 = icmp eq i64 %index.next, %end.idx.rnd.down
author	Nadav Rotem <nrotem@apple.com>
	Fri, 21 Dec 2012 01:33:59 +0000 (01:33 +0000)
committer	Nadav Rotem <nrotem@apple.com>
	Fri, 21 Dec 2012 01:33:59 +0000 (01:33 +0000)
lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
lib/Target/X86/X86ISelLowering.h		patch \| blob \| history
test/Analysis/CostModel/X86/load_store.ll	[new file with mode: 0644]	patch \| blob
test/Analysis/CostModel/X86/vectorized-loop.ll		patch \| blob \| history