[X86][SSE] Updated SHL/LSHR i64 vectorization costs.

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 18 Jul 2015 20:06:30 +0000 (20:06 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 18 Jul 2015 20:06:30 +0000 (20:06 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 18 Jul 2015 20:06:30 +0000 (20:06 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 18 Jul 2015 20:06:30 +0000 (20:06 +0000)
diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp

index 7df7260918439b3280239cce2f941a5f14639b50..7cda54d6b009c6779790160b24a674c6b50bea21 100644 (file)
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -259,13 +259,13 @@ unsigned X86TTIImpl::getArithmeticInstrCost(
      { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
      { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
      { ISD::SHL,  MVT::v4i32,   2*5 }, // We optimized this using mul.
-    { ISD::SHL,  MVT::v2i64,  2*10 }, // Scalarized.
-    { ISD::SHL,  MVT::v4i64,  4*10 }, // Scalarized.
+    { ISD::SHL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
+    { ISD::SHL,  MVT::v4i64,     8 }, // splat+shuffle sequence.
  
      { ISD::SRL,  MVT::v16i8,    26 }, // cmpgtb sequence.
      { ISD::SRL,  MVT::v8i16,    32 }, // cmpgtb sequence.
      { ISD::SRL,  MVT::v4i32,    16 }, // Shift each lane + blend.
-    { ISD::SRL,  MVT::v2i64,  2*10 }, // Scalarized.
+    { ISD::SRL,  MVT::v2i64,     4 }, // splat+shuffle sequence.
  
      { ISD::SRA,  MVT::v16i8,    54 }, // unpacked cmpgtb sequence.
      { ISD::SRA,  MVT::v8i16,    32 }, // cmpgtb sequence.
diff --git a/test/Analysis/CostModel/X86/testshiftlshr.ll b/test/Analysis/CostModel/X86/testshiftlshr.ll

index 5775a42d08adb3af379650961a1f04ee5393b968..52f176fe4d63d0ec72286d8228bb9a78f119a050 100644 (file)
--- a/test/Analysis/CostModel/X86/testshiftlshr.ll
+++ b/test/Analysis/CostModel/X86/testshiftlshr.ll
@@ -5,7 +5,7 @@
  define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
  entry:
    ; SSE2: shift2i16
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
    ; SSE2-CODEGEN: shift2i16
    ; SSE2-CODEGEN: psrlq
  
@@ -65,7 +65,7 @@ entry:
  define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
  entry:
    ; SSE2: shift2i32
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
    ; SSE2-CODEGEN: shift2i32
    ; SSE2-CODEGEN: psrlq
  
@@ -125,7 +125,7 @@ entry:
  define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
  entry:
    ; SSE2: shift2i64
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
    ; SSE2-CODEGEN: shift2i64
    ; SSE2-CODEGEN: psrlq
  
@@ -137,7 +137,7 @@ entry:
  define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
  entry:
    ; SSE2: shift4i64
-  ; SSE2: cost of 40 {{.*}} lshr
+  ; SSE2: cost of 8 {{.*}} lshr
    ; SSE2-CODEGEN: shift4i64
    ; SSE2-CODEGEN: psrlq
  
@@ -149,7 +149,7 @@ entry:
  define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
  entry:
    ; SSE2: shift8i64
-  ; SSE2: cost of 80 {{.*}} lshr
+  ; SSE2: cost of 16 {{.*}} lshr
    ; SSE2-CODEGEN: shift8i64
    ; SSE2-CODEGEN: psrlq
  
@@ -161,7 +161,7 @@ entry:
  define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
  entry:
    ; SSE2: shift16i64
-  ; SSE2: cost of 160 {{.*}} lshr
+  ; SSE2: cost of 32 {{.*}} lshr
    ; SSE2-CODEGEN: shift16i64
    ; SSE2-CODEGEN: psrlq
  
@@ -173,7 +173,7 @@ entry:
  define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
  entry:
    ; SSE2: shift32i64
-  ; SSE2: cost of 320 {{.*}} lshr
+  ; SSE2: cost of 64 {{.*}} lshr
    ; SSE2-CODEGEN: shift32i64
    ; SSE2-CODEGEN: psrlq
  
@@ -185,7 +185,7 @@ entry:
  define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
  entry:
    ; SSE2: shift2i8
-  ; SSE2: cost of 20 {{.*}} lshr
+  ; SSE2: cost of 4 {{.*}} lshr
    ; SSE2-CODEGEN: shift2i8
    ; SSE2-CODEGEN: psrlq
  
diff --git a/test/Analysis/CostModel/X86/testshiftshl.ll b/test/Analysis/CostModel/X86/testshiftshl.ll

index d4e33818932bddcb862e177ab7a42836c1d44a7d..e385c5bfeeacf7d78d3b4f9d7e059e5e40d9d5ce 100644 (file)
--- a/test/Analysis/CostModel/X86/testshiftshl.ll
+++ b/test/Analysis/CostModel/X86/testshiftshl.ll
@@ -5,7 +5,7 @@
  define %shifttype @shift2i16(%shifttype %a, %shifttype %b) {
  entry:
    ; SSE2: shift2i16
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
    ; SSE2-CODEGEN: shift2i16
    ; SSE2-CODEGEN: psllq
  
@@ -65,7 +65,7 @@ entry:
  define %shifttype2i32 @shift2i32(%shifttype2i32 %a, %shifttype2i32 %b) {
  entry:
    ; SSE2: shift2i32
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
    ; SSE2-CODEGEN: shift2i32
    ; SSE2-CODEGEN: psllq
  
@@ -125,7 +125,7 @@ entry:
  define %shifttype2i64 @shift2i64(%shifttype2i64 %a, %shifttype2i64 %b) {
  entry:
    ; SSE2: shift2i64
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
    ; SSE2-CODEGEN: shift2i64
    ; SSE2-CODEGEN: psllq
  
@@ -137,7 +137,7 @@ entry:
  define %shifttype4i64 @shift4i64(%shifttype4i64 %a, %shifttype4i64 %b) {
  entry:
    ; SSE2: shift4i64
-  ; SSE2: cost of 40 {{.*}} shl
+  ; SSE2: cost of 8 {{.*}} shl
    ; SSE2-CODEGEN: shift4i64
    ; SSE2-CODEGEN: psllq
  
@@ -149,7 +149,7 @@ entry:
  define %shifttype8i64 @shift8i64(%shifttype8i64 %a, %shifttype8i64 %b) {
  entry:
    ; SSE2: shift8i64
-  ; SSE2: cost of 80 {{.*}} shl
+  ; SSE2: cost of 16 {{.*}} shl
    ; SSE2-CODEGEN: shift8i64
    ; SSE2-CODEGEN: psllq
  
@@ -161,7 +161,7 @@ entry:
  define %shifttype16i64 @shift16i64(%shifttype16i64 %a, %shifttype16i64 %b) {
  entry:
    ; SSE2: shift16i64
-  ; SSE2: cost of 160 {{.*}} shl
+  ; SSE2: cost of 32 {{.*}} shl
    ; SSE2-CODEGEN: shift16i64
    ; SSE2-CODEGEN: psllq
  
@@ -173,7 +173,7 @@ entry:
  define %shifttype32i64 @shift32i64(%shifttype32i64 %a, %shifttype32i64 %b) {
  entry:
    ; SSE2: shift32i64
-  ; SSE2: cost of 320 {{.*}} shl
+  ; SSE2: cost of 64 {{.*}} shl
    ; SSE2-CODEGEN: shift32i64
    ; SSE2-CODEGEN: psllq
  
@@ -185,7 +185,7 @@ entry:
  define %shifttype2i8 @shift2i8(%shifttype2i8 %a, %shifttype2i8 %b) {
  entry:
    ; SSE2: shift2i8
-  ; SSE2: cost of 20 {{.*}} shl
+  ; SSE2: cost of 4 {{.*}} shl
    ; SSE2-CODEGEN: shift2i8
    ; SSE2-CODEGEN: psllq
  
diff --git a/test/Analysis/CostModel/X86/vshift-cost.ll b/test/Analysis/CostModel/X86/vshift-cost.ll

index 84d72463ac0dd75b6c3debab0b88118d774303aa..dd93badc893fed55ff276fc7815f58ca497da228 100644 (file)
--- a/test/Analysis/CostModel/X86/vshift-cost.ll
+++ b/test/Analysis/CostModel/X86/vshift-cost.ll
@@ -62,9 +62,9 @@ define <2 x i64> @test5(<2 x i64> %a) {
    ret <2 x i64> %shl
  }
  ; CHECK: 'Cost Model Analysis' for function 'test5':
-; SSE2: Found an estimated cost of 20 for instruction:   %shl
-; SSE41: Found an estimated cost of 20 for instruction:   %shl
-; AVX: Found an estimated cost of 20 for instruction:   %shl
+; SSE2: Found an estimated cost of 4 for instruction:   %shl
+; SSE41: Found an estimated cost of 4 for instruction:   %shl
+; AVX: Found an estimated cost of 4 for instruction:   %shl
  ; AVX2: Found an estimated cost of 1 for instruction:   %shl
  
  
@@ -117,9 +117,9 @@ define <4 x i64> @test8(<4 x i64> %a) {
    ret <4 x i64> %shl
  }
  ; CHECK: 'Cost Model Analysis' for function 'test8':
-; SSE2: Found an estimated cost of 40 for instruction:   %shl
-; SSE41: Found an estimated cost of 40 for instruction:   %shl
-; AVX: Found an estimated cost of 40 for instruction:   %shl
+; SSE2: Found an estimated cost of 8 for instruction:   %shl
+; SSE41: Found an estimated cost of 8 for instruction:   %shl
+; AVX: Found an estimated cost of 8 for instruction:   %shl
  ; AVX2: Found an estimated cost of 1 for instruction:   %shl
  
  
@@ -159,9 +159,9 @@ define <8 x i64> @test11(<8 x i64> %a) {
    ret <8 x i64> %shl
  }
  ; CHECK: 'Cost Model Analysis' for function 'test11':
-; SSE2: Found an estimated cost of 80 for instruction:   %shl
-; SSE41: Found an estimated cost of 80 for instruction:   %shl
-; AVX: Found an estimated cost of 80 for instruction:   %shl
+; SSE2: Found an estimated cost of 16 for instruction:   %shl
+; SSE41: Found an estimated cost of 16 for instruction:   %shl
+; AVX: Found an estimated cost of 16 for instruction:   %shl
  ; AVX2: Found an estimated cost of 2 for instruction:   %shl
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 18 Jul 2015 20:06:30 +0000 (20:06 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 18 Jul 2015 20:06:30 +0000 (20:06 +0000)
lib/Target/X86/X86TargetTransformInfo.cpp		patch \| blob \| history
test/Analysis/CostModel/X86/testshiftlshr.ll		patch \| blob \| history
test/Analysis/CostModel/X86/testshiftshl.ll		patch \| blob \| history
test/Analysis/CostModel/X86/vshift-cost.ll		patch \| blob \| history