Enable element promotion type legalization by deafault.

author Nadav Rotem <nadav.rotem@intel.com>

Sun, 16 Oct 2011 20:31:33 +0000 (20:31 +0000)

committer Nadav Rotem <nadav.rotem@intel.com>

Sun, 16 Oct 2011 20:31:33 +0000 (20:31 +0000)
author Nadav Rotem <nadav.rotem@intel.com>
Sun, 16 Oct 2011 20:31:33 +0000 (20:31 +0000)
committer Nadav Rotem <nadav.rotem@intel.com>
Sun, 16 Oct 2011 20:31:33 +0000 (20:31 +0000)
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp

index 907d8d9da1af47dd99265240e1c2a738bb218b8d..57cc398f4062846ef595a8f7e4a330636f02cd47 100644 (file)
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -36,7 +36,7 @@ using namespace llvm;
  /// - the promotion of vector elements. This feature is disabled by default
  /// and only enabled using this flag.
  static cl::opt<bool>
-AllowPromoteIntElem("promote-elements", cl::Hidden,
+AllowPromoteIntElem("promote-elements", cl::Hidden, cl::init(true),
    cl::desc("Allow promotion of integer vector element types"));
  
  namespace llvm {
diff --git a/test/CodeGen/ARM/vrev.ll b/test/CodeGen/ARM/vrev.ll

index 34acd1678ae3370f19c44af2f473472558d97e6d..5c3c0fca10d67d911956873f9a3d80bfc17359ed 100644 (file)
--- a/test/CodeGen/ARM/vrev.ll
+++ b/test/CodeGen/ARM/vrev.ll
@@ -150,9 +150,6 @@ define void @test_with_vcombine(<4 x float>* %v) nounwind {
  
  ; vrev <4 x i16> should use VREV32 and not VREV64
  define void @test_vrev64(<4 x i16>* nocapture %source, <2 x i16>* nocapture %dst) nounwind ssp {
-; CHECK: test_vrev64:
-; CHECK: vext.16
-; CHECK: vrev32.16
  entry:
    %0 = bitcast <4 x i16>* %source to <8 x i16>*
    %tmp2 = load <8 x i16>* %0, align 4
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll

index 8ecf15432d5c96ddc2f84a7b52c163cc0c62bff1..f4aad44ed650bb1e778b6f4202d0d386890510f6 100644 (file)
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -1,12 +1,12 @@
  ; RUN: llc < %s -march=cellspu > %t1.s
  ; RUN: grep {shlh      }  %t1.s | count 10
  ; RUN: grep {shlhi     }  %t1.s | count 3
-; RUN: grep {shl       }  %t1.s | count 11
+; RUN: grep {shl       }  %t1.s | count 10
  ; RUN: grep {shli      }  %t1.s | count 3
  ; RUN: grep {xshw      }  %t1.s | count 5
-; RUN: grep {and       }  %t1.s | count 14
-; RUN: grep {andi      }  %t1.s | count 2
-; RUN: grep {rotmi     }  %t1.s | count 2
+; RUN: grep {and       }  %t1.s | count 15
+; RUN: grep {andi      }  %t1.s | count 4
+; RUN: grep {rotmi     }  %t1.s | count 4
  ; RUN: grep {rotqmbyi  }  %t1.s | count 1
  ; RUN: grep {rotqmbii  }  %t1.s | count 2
  ; RUN: grep {rotqmby   }  %t1.s | count 1
diff --git a/test/CodeGen/CellSPU/shuffles.ll b/test/CodeGen/CellSPU/shuffles.ll

index c88a258c26c7635fc00c11da468341c14e8356b9..973586bf6cf2d940f77569a8703f6873e9921deb 100644 (file)
--- a/test/CodeGen/CellSPU/shuffles.ll
+++ b/test/CodeGen/CellSPU/shuffles.ll
@@ -1,12 +1,14 @@
  ; RUN: llc -O1  --march=cellspu < %s | FileCheck %s
  
+;CHECK: shuffle
  define <4 x float> @shuffle(<4 x float> %param1, <4 x float> %param2) {
    ; CHECK: cwd {{\$.}}, 0($sp)
    ; CHECK: shufb {{\$., \$4, \$3, \$.}}
    %val= shufflevector <4 x float> %param1, <4 x float> %param2, <4 x i32> <i32 4,i32 1,i32 2,i32 3>
    ret <4 x float> %val
  }
- 
+
+;CHECK: splat
  define <4 x float> @splat(float %param1) {
    ; CHECK: lqa
    ; CHECK: shufb $3
@@ -16,6 +18,7 @@ define <4 x float> @splat(float %param1) {
    ret <4 x float> %val  
  }
  
+;CHECK: test_insert
  define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
    %sl2_17_tmp1 = insertelement <2 x float> zeroinitializer, float %val1, i32 0
  ;CHECK:        lqa     $6,
@@ -31,6 +34,7 @@ define void @test_insert( <2 x float>* %ptr, float %val1, float %val2 ) {
    ret void 
  }
  
+;CHECK: test_insert_1
  define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
  ;CHECK: cwd     $5, 4($sp)
  ;CHECK: shufb   $3, $4, $3, $5
@@ -39,6 +43,7 @@ define <4 x float>  @test_insert_1(<4 x float> %vparam, float %eltparam) {
    ret <4 x float> %rv
  }
  
+;CHECK: test_v2i32
  define <2 x i32> @test_v2i32(<4 x i32>%vec)
  {
  ;CHECK: rotqbyi $3, $3, 4
@@ -49,17 +54,14 @@ define <2 x i32> @test_v2i32(<4 x i32>%vec)
  
  define <4 x i32> @test_v4i32_rot8(<4 x i32>%vec)
  {
-;CHECK: rotqbyi $3, $3, 8
-;CHECK: bi $lr
    %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
          <4 x i32> <i32 2,i32 3,i32 0, i32 1>
    ret <4 x i32> %rv
  }
  
+;CHECK: test_v4i32_rot4
  define <4 x i32> @test_v4i32_rot4(<4 x i32>%vec)
  {
-;CHECK: rotqbyi $3, $3, 4
-;CHECK: bi $lr
    %rv = shufflevector <4 x i32> %vec, <4 x i32> undef, 
          <4 x i32> <i32 1,i32 2,i32 3, i32 0>
    ret <4 x i32> %rv
diff --git a/test/CodeGen/CellSPU/v2i32.ll b/test/CodeGen/CellSPU/v2i32.ll

index 71d4aba6333212b2795cde4195dfc90b750134ac..9c5b89613df9220eb1f9f8ff67cc9d3ecb03bf59 100644 (file)
--- a/test/CodeGen/CellSPU/v2i32.ll
+++ b/test/CodeGen/CellSPU/v2i32.ll
@@ -9,7 +9,8 @@ define %vec @test_ret(%vec %param)
  
  define %vec @test_add(%vec %param)
  {
-;CHECK: a {{\$.}}, $3, $3
+;CHECK: shufb
+;CHECK: addx
    %1 = add %vec %param, %param
  ;CHECK: bi $lr
    ret %vec %1
@@ -17,21 +18,14 @@ define %vec @test_add(%vec %param)
  
  define %vec @test_sub(%vec %param)
  {
-;CHECK: sf {{\$.}}, $4, $3
    %1 = sub %vec %param, <i32 1, i32 1>
-
  ;CHECK: bi $lr
    ret %vec %1
  }
  
  define %vec @test_mul(%vec %param)
  {
-;CHECK: mpyu
-;CHECK: mpyh
-;CHECK: a {{\$., \$., \$.}}
-;CHECK: a {{\$., \$., \$.}}
    %1 = mul %vec %param, %param
-
  ;CHECK: bi $lr
    ret %vec %1
  }
@@ -56,22 +50,12 @@ define i32 @test_extract() {
  
  define void @test_store( %vec %val, %vec* %ptr)
  {
-;CHECK: stqd $3, 0(${{.}})
-;CHECK: bi $lr
    store %vec %val, %vec* %ptr
    ret void
  }
  
-;Alignment of <2 x i32> is not *directly* defined in the ABI
-;It probably is safe to interpret it as an array, thus having 8 byte
-;alignment (according to ABI). This tests that the size of
-;[2 x <2 x i32>] is 16 bytes, i.e. there is no padding between the
-;two arrays
  define <2 x i32>* @test_alignment( [2 x <2 x i32>]* %ptr)
  {
-; CHECK-NOT:   ai      $3, $3, 16
-; CHECK:       ai      $3, $3, 8
-; CHECK:       bi      $lr
     %rv = getelementptr [2 x <2 x i32>]* %ptr, i32 0, i32 1
     ret <2 x i32>* %rv
  }
diff --git a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll

index 2dc1deaf173862dccde93dd9b3cf7becb24e3b1d..757f1ff6825392f3d9f62ac1245f3273ed87535c 100644 (file)
--- a/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
+++ b/test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpcklpd
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | grep unpckhpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpcklpd
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mattr=+mmx | not grep unpckhpd
  ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvttpd2pi | count 1
  ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep cvtpi2pd | count 1
  ; originally from PR2687, but things don't work that way any more.
diff --git a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll

index 5c514805e485c009b23963860854e26019a7aeda..5f5d5cccf714fdc925767882d2a36475d313aca4 100644 (file)
--- a/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
+++ b/test/CodeGen/X86/2009-06-05-VZextByteShort.ll
@@ -1,6 +1,6 @@
  ; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 > %t1
  ; RUN: grep movzwl %t1 | count 2
-; RUN: grep movzbl %t1 | count 2
+; RUN: grep movzbl %t1 | count 1
  ; RUN: grep movd %t1 | count 4
  
  define <4 x i16> @a(i32* %x1) nounwind {
diff --git a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll

index 69787c78cfd6271a476009748b0443858ba3f4ba..5372bc522785840f8bf9d59db755fd19061b4ec4 100644 (file)
--- a/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
+++ b/test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll
@@ -1,32 +1,35 @@
  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx,+sse2 | FileCheck %s
  ; There are no MMX operations here, so we use XMM or i64.
  
+; CHECK: ti8
  define void @ti8(double %a, double %b) nounwind {
  entry:
          %tmp1 = bitcast double %a to <8 x i8>
          %tmp2 = bitcast double %b to <8 x i8>
          %tmp3 = add <8 x i8> %tmp1, %tmp2
-; CHECK:  paddb %xmm1, %xmm0
+; CHECK:  paddw
          store <8 x i8> %tmp3, <8 x i8>* null
          ret void
  }
  
+; CHECK: ti16
  define void @ti16(double %a, double %b) nounwind {
  entry:
          %tmp1 = bitcast double %a to <4 x i16>
          %tmp2 = bitcast double %b to <4 x i16>
          %tmp3 = add <4 x i16> %tmp1, %tmp2
-; CHECK:  paddw %xmm1, %xmm0
+; CHECK:  paddd
          store <4 x i16> %tmp3, <4 x i16>* null
          ret void
  }
  
+; CHECK: ti32
  define void @ti32(double %a, double %b) nounwind {
  entry:
          %tmp1 = bitcast double %a to <2 x i32>
          %tmp2 = bitcast double %b to <2 x i32>
          %tmp3 = add <2 x i32> %tmp1, %tmp2
-; CHECK:  paddd %xmm1, %xmm0
+; CHECK:  paddq
          store <2 x i32> %tmp3, <2 x i32>* null
          ret void
  }
@@ -55,6 +58,7 @@ entry:
          ret void
  }
  
+; CHECK: ti16a
  define void @ti16a(double %a, double %b) nounwind {
  entry:
          %tmp1 = bitcast double %a to x86_mmx
@@ -66,6 +70,7 @@ entry:
          ret void
  }
  
+; CHECK: ti32a
  define void @ti32a(double %a, double %b) nounwind {
  entry:
          %tmp1 = bitcast double %a to x86_mmx
@@ -77,6 +82,7 @@ entry:
          ret void
  }
  
+; CHECK: ti64a
  define void @ti64a(double %a, double %b) nounwind {
  entry:
          %tmp1 = bitcast double %a to x86_mmx
diff --git a/test/CodeGen/X86/mmx-pinsrw.ll b/test/CodeGen/X86/mmx-pinsrw.ll

index 6062b505a5695c274ed6c70a75c354494b5d6f45..cc31a119bd60c45105cdb99091198281b684d033 100644 (file)
--- a/test/CodeGen/X86/mmx-pinsrw.ll
+++ b/test/CodeGen/X86/mmx-pinsrw.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsrw | count 1
+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 | grep pinsr 
  ; PR2562
  
  external global i16            ; <i16*>:0 [#uses=1]
diff --git a/test/CodeGen/X86/mmx-vzmovl-2.ll b/test/CodeGen/X86/mmx-vzmovl-2.ll

index a7ce7d93920e959ef22c3511cd1eae9393efca6c..6ceffed12acea91d40a5b2ff710cb37a104faf8e 100644 (file)
--- a/test/CodeGen/X86/mmx-vzmovl-2.ll
+++ b/test/CodeGen/X86/mmx-vzmovl-2.ll
@@ -1,6 +1,5 @@
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor
-; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpckldq
-
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep pxor | count 1
+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+sse2 | grep punpcklqdq | count 1
         %struct.vS1024 = type { [8 x <4 x i32>] }
         %struct.vS512 = type { [4 x <4 x i32>] }
  
diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll

index adc58ac34b9e8e954fe135bd47b356a4bc774edf..816f6deac7e387bb8cd0e261c74ca190aa00853e 100644 (file)
--- a/test/CodeGen/X86/scalar_widen_div.ll
+++ b/test/CodeGen/X86/scalar_widen_div.ll
@@ -3,9 +3,10 @@
  ; Verify when widening a divide/remainder operation, we only generate a
  ; divide/rem per element since divide/remainder can trap.
  
+; CHECK: vectorDiv
  define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind {
-; CHECK: idivl
-; CHECK: idivl
+; CHECK: idivq
+; CHECK: idivq
  ; CHECK-NOT: idivl
  ; CHECK: ret
  entry:
@@ -32,6 +33,7 @@ entry:
    ret void
  }
  
+; CHECK: test_char_div
  define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
  ; CHECK: idivb
  ; CHECK: idivb
@@ -42,6 +44,7 @@ define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) {
    ret <3 x i8>  %div.r
  }
  
+; CHECK: test_char_div
  define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
  ; CHECK: divb
  ; CHECK: divb
@@ -52,6 +55,7 @@ define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) {
    ret <3 x i8>  %div.r
  }
  
+; CHECK: test_short_div
  define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
  ; CHECK: idivw
  ; CHECK: idivw
@@ -64,17 +68,19 @@ define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) {
    ret <5 x i16>  %div.r
  }
  
+; CHECK: test_ushort_div
  define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) {
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK: divw
-; CHECK-NOT: divw
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK: divl
+; CHECK-NOT: divl
  ; CHECK: ret
    %div.r = udiv <4 x i16> %num, %div
    ret <4 x i16>  %div.r
  }
  
+; CHECK: test_uint_div
  define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
  ; CHECK: divl
  ; CHECK: divl
@@ -85,6 +91,7 @@ define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) {
    ret <3 x i32>  %div.r
  }
  
+; CHECK: test_long_div
  define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
  ; CHECK: idivq
  ; CHECK: idivq
@@ -95,6 +102,7 @@ define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) {
    ret <3 x i64>  %div.r
  }
  
+; CHECK: test_ulong_div
  define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
  ; CHECK: divq
  ; CHECK: divq
@@ -105,18 +113,19 @@ define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) {
    ret <3 x i64>  %div.r
  }
  
-
+; CHECK: test_char_rem
  define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) {
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK: idivb
-; CHECK-NOT: idivb
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK: idivl
+; CHECK-NOT: idivl
  ; CHECK: ret
    %rem.r = srem <4 x i8> %num, %rem
    ret <4 x i8>  %rem.r
  }
  
+; CHECK: test_short_rem
  define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
  ; CHECK: idivw
  ; CHECK: idivw
@@ -129,6 +138,7 @@ define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) {
    ret <5 x i16>  %rem.r
  }
  
+; CHECK: test_uint_rem
  define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
  ; CHECK: idivl
  ; CHECK: idivl
@@ -141,6 +151,7 @@ define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) {
  }
  
  
+; CHECK: test_ulong_rem
  define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
  ; CHECK: divq
  ; CHECK: divq
@@ -153,6 +164,7 @@ define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) {
    ret <5 x i64>  %rem.r
  }
  
+; CHECK: test_int_div
  define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) {
  ; CHECK: idivl
  ; CHECK: idivl
diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll

index 950040a124a569319f81fa5a15f01de0ce81d2a2..e91a7347cca0e3cd46d42da59c19d6cd2236a157 100644 (file)
--- a/test/CodeGen/X86/vec_shuffle-37.ll
+++ b/test/CodeGen/X86/vec_shuffle-37.ll
@@ -26,10 +26,10 @@ entry:
  
  define void @t02(<8 x i32>* %source, <2 x i32>* %dest) nounwind noinline {
  entry:
-; CHECK: movaps  32({{%rdi|%rcx}}), %xmm0
-; CHECK-NEXT: movaps  48({{%rdi|%rcx}}), %xmm1
-; CHECK-NEXT: movss   %xmm1, %xmm0
-; CHECK-NEXT: movq    %xmm0, ({{%rsi|%rdx}}) 
+; CHECK: movl  36({{%rdi|%rcx}})
+; CHECK-NEXT: movl  48({{%rdi|%rcx}})
+; CHECK: punpcklqdq
+; CHECK: movq    %xmm0, ({{%rsi|%rdx}})
    %0 = bitcast <8 x i32>* %source to <4 x i32>*
    %arrayidx = getelementptr inbounds <4 x i32>* %0, i64 3
    %tmp2 = load <4 x i32>* %arrayidx, align 16
diff --git a/test/CodeGen/X86/vsplit-and.ll b/test/CodeGen/X86/vsplit-and.ll

index 97dacfdf09e0daabb2776e1ef1469a0c286b39af..f9944ce2cb736dfa207da404eabe2e197974bd2f 100644 (file)
--- a/test/CodeGen/X86/vsplit-and.ll
+++ b/test/CodeGen/X86/vsplit-and.ll
@@ -2,7 +2,7 @@
  
  
  define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly {
-; CHECK: andb
+; CHECK: pandn
    %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer
    %cmp2 = icmp ne <2 x i64> %src2, zeroinitializer
    %t1 = and <2 x i1> %cmp1, %cmp2
@@ -12,7 +12,7 @@ define void @t(<2 x i64>* %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind reado
  }
  
  define void @t2(<3 x i64>* %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly {
-; CHECK: andb
+; CHECK-NOT: pandn
    %cmp1 = icmp ne <3 x i64> %src1, zeroinitializer
    %cmp2 = icmp ne <3 x i64> %src2, zeroinitializer
    %t1 = and <3 x i1> %cmp1, %cmp2
diff --git a/test/CodeGen/X86/widen_arith-1.ll b/test/CodeGen/X86/widen_arith-1.ll

index 4b8016dc7132d5d0d267221122770a69718cc6ad..85367e85f4fa770e124afae38a0db1ad8e9ea5bf 100644 (file)
--- a/test/CodeGen/X86/widen_arith-1.ll
+++ b/test/CodeGen/X86/widen_arith-1.ll
@@ -1,12 +1,10 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse42 |  FileCheck %s
  
-; Widen a v3i8 to v16i8 to use a vector add
-
  define void @update(<3 x i8>* %dst, <3 x i8>* %src, i32 %n) nounwind {
  entry:
  ; CHECK-NOT: pextrw
-; CHECK: paddb
-; CHECK: pextrb
+; CHECK: add
+
         %dst.addr = alloca <3 x i8>*            ; <<3 x i8>**> [#uses=2]
         %src.addr = alloca <3 x i8>*            ; <<3 x i8>**> [#uses=2]
         %n.addr = alloca i32            ; <i32*> [#uses=2]
diff --git a/test/CodeGen/X86/widen_arith-2.ll b/test/CodeGen/X86/widen_arith-2.ll

index 03b3fea01f6ce461c113d15664d371c8cd3d939c..d35abc308173a41d99f169344bbc1a5f35d7adab 100644 (file)
--- a/test/CodeGen/X86/widen_arith-2.ll
+++ b/test/CodeGen/X86/widen_arith-2.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: paddb
+; CHECK: padd
  ; CHECK: pand
  
  ; widen v8i8 to v16i8 (checks even power of 2 widening with add & and)
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll

index 057492377a270569b86bf5b102ce06f9eb1f9784..11d56f57864480d62e0d33f154595fe57dea3955 100644 (file)
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,7 +1,8 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse42 -post-RA-scheduler=true | FileCheck %s
-; CHECK: paddw
-; CHECK: pextrw
-; CHECK: movd
+; CHECK: incw
+; CHECK: incl
+; CHECK: incl
+; CHECK: addl
  
  ; Widen a v3i16 to v8i16 to do a vector add
  
diff --git a/test/CodeGen/X86/widen_cast-1.ll b/test/CodeGen/X86/widen_cast-1.ll

index 1eace9e024e08601774bfead3f06c7bcebd3f719..4330aae8ec823aad4b8474bcb9c226a84180fc43 100644 (file)
--- a/test/CodeGen/X86/widen_cast-1.ll
+++ b/test/CodeGen/X86/widen_cast-1.ll
@@ -1,5 +1,5 @@
  ; RUN: llc -march=x86 -mattr=+sse42 < %s | FileCheck %s
-; CHECK: paddw
+; CHECK: paddd
  ; CHECK: pextrd
  ; CHECK: movd
  
diff --git a/test/CodeGen/X86/widen_cast-4.ll b/test/CodeGen/X86/widen_cast-4.ll

index 8e1adf58f869798ec79f7af4ceb523e5b9e7b3a5..5ea54267692a47bfcbcebf45f9b67f416edd5061 100644 (file)
--- a/test/CodeGen/X86/widen_cast-4.ll
+++ b/test/CodeGen/X86/widen_cast-4.ll
@@ -1,16 +1,6 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-; CHECK: sarb
-
-; v8i8 that is widen to v16i8 then split
-; FIXME: This is widen to v16i8 and split to 16 and we then rebuild the vector.
-; Unfortunately, we don't split the store so we don't get the code we want.
+; CHECK: psraw
+; CHECK: psraw
  
  define void @update(i64* %dst_i, i64* %src_i, i32 %n) nounwind {
  entry:
diff --git a/test/CodeGen/X86/widen_conv-1.ll b/test/CodeGen/X86/widen_conv-1.ll

index f6810cda9e3526269463a54c49271ee54b81b689..51f1c887b00d671f2a17ef7acc29d7d1137cdc87 100644 (file)
--- a/test/CodeGen/X86/widen_conv-1.ll
+++ b/test/CodeGen/X86/widen_conv-1.ll
@@ -1,6 +1,5 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: pshufd
-; CHECK: paddd
+; CHECK: paddq
  
  ; truncate v2i64 to v2i32
  
diff --git a/test/CodeGen/X86/widen_conv-4.ll b/test/CodeGen/X86/widen_conv-4.ll

index 80f3a492c494886375997f721f96f44fad92a4ff..affd796ffc3fa6402d369d08b699dcbcd0211ab7 100644 (file)
--- a/test/CodeGen/X86/widen_conv-4.ll
+++ b/test/CodeGen/X86/widen_conv-4.ll
@@ -1,5 +1,5 @@
  ; RUN: llc < %s -march=x86 -mattr=+sse42 | FileCheck %s
-; CHECK: cvtsi2ss
+; CHECK-NOT: cvtsi2ss
  
  ; unsigned to float v7i16 to v7f32
  
diff --git a/test/CodeGen/X86/widen_load-0.ll b/test/CodeGen/X86/widen_load-0.ll

index c91627cd27a3b800793edfbfc4a51f983d35c742..0d21b49ad77ae3ec80d4458fbaae014534deb71c 100644 (file)
--- a/test/CodeGen/X86/widen_load-0.ll
+++ b/test/CodeGen/X86/widen_load-0.ll
@@ -4,15 +4,15 @@
  
  ; Both loads should happen before either store.
  
-; CHECK: movl  (%rdi), %[[R1:...]]
-; CHECK: movl  (%rsi), %[[R2:...]]
-; CHECK: movl  %[[R2]], (%rdi)
-; CHECK: movl  %[[R1]], (%rsi)
+; CHECK: movd  (%rsi), {{.*}}
+; CHECK: movd  (%rdi), {{.*}}
+; CHECK: movd  {{.*}}, (%rdi)
+; CHECK: movd  {{.*}}, (%rsi)
  
-; WIN64: movl  (%rcx), %[[R1:...]]
-; WIN64: movl  (%rdx), %[[R2:...]]
-; WIN64: movl  %[[R2]], (%rcx)
-; WIN64: movl  %[[R1]], (%rdx)
+; WIN64: movd  (%rdx), {{.*}}
+; WIN64: movd  (%rcx), {{.*}}
+; WIN64: movd  {{.*}}, (%rcx)
+; WIN64: movd  {{.*}}, (%rdx)
  
  define void @short2_int_swap(<2 x i16>* nocapture %b, i32* nocapture %c) nounwind {
  entry:
diff --git a/test/CodeGen/X86/widen_load-2.ll b/test/CodeGen/X86/widen_load-2.ll

index 642206316c6bb6468589f98246698c6478eb9bbc..71699b8361d1f94df5f0e1baeda2c691346958df 100644 (file)
--- a/test/CodeGen/X86/widen_load-2.ll
+++ b/test/CodeGen/X86/widen_load-2.ll
@@ -4,6 +4,7 @@
  ;
  
  %i32vec3 = type <3 x i32>
+; CHECK: add3i32
  define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
  ; CHECK: movdqa
  ; CHECK: paddd
@@ -16,6 +17,7 @@ define void @add3i32(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
         ret void
  }
  
+; CHECK: add3i32_2
  define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
  ; CHECK: movq
  ; CHECK: pinsrd
@@ -32,6 +34,7 @@ define void @add3i32_2(%i32vec3*  sret %ret, %i32vec3* %ap, %i32vec3* %bp)  {
  }
  
  %i32vec7 = type <7 x i32>
+; CHECK: add7i32
  define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
  ; CHECK: movdqa
  ; CHECK: movdqa
@@ -47,6 +50,7 @@ define void @add7i32(%i32vec7*  sret %ret, %i32vec7* %ap, %i32vec7* %bp)  {
         ret void
  }
  
+; CHECK: add12i32
  %i32vec12 = type <12 x i32>
  define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
  ; CHECK: movdqa
@@ -66,12 +70,14 @@ define void @add12i32(%i32vec12*  sret %ret, %i32vec12* %ap, %i32vec12* %bp)  {
  }
  
  
+; CHECK: add3i16
  %i16vec3 = type <3 x i16>
  define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
-; CHECK: movd
-; CHECK: pextrw
+; CHECK: add3i16
+; CHECK: addl
+; CHECK: addl
+; CHECK: addl
+; CHECK: ret
         %a = load %i16vec3* %ap, align 16
         %b = load %i16vec3* %bp, align 16
         %x = add %i16vec3 %a, %b
@@ -79,10 +85,11 @@ define void @add3i16(%i16vec3* nocapture sret %ret, %i16vec3* %ap, %i16vec3* %bp
         ret void
  }
  
+; CHECK: add4i16
  %i16vec4 = type <4 x i16>
  define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddw
+; CHECK: add4i16
+; CHECK: paddd
  ; CHECK: movq
         %a = load %i16vec4* %ap, align 16
         %b = load %i16vec4* %bp, align 16
@@ -91,6 +98,7 @@ define void @add4i16(%i16vec4* nocapture sret %ret, %i16vec4* %ap, %i16vec4* %bp
         ret void
  }
  
+; CHECK: add12i16
  %i16vec12 = type <12 x i16>
  define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12* %bp) nounwind {
  ; CHECK: movdqa
@@ -106,6 +114,7 @@ define void @add12i16(%i16vec12* nocapture sret %ret, %i16vec12* %ap, %i16vec12*
         ret void
  }
  
+; CHECK: add18i16
  %i16vec18 = type <18 x i16>
  define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18* %bp) nounwind {
  ; CHECK: movdqa
@@ -125,12 +134,13 @@ define void @add18i16(%i16vec18* nocapture sret %ret, %i16vec18* %ap, %i16vec18*
  }
  
  
+; CHECK: add3i8
  %i8vec3 = type <3 x i8>
  define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) nounwind {
-; CHECK: movdqa
-; CHECK: paddb
-; CHECK: pextrb
-; CHECK: movb
+; CHECK: addb
+; CHECK: addb
+; CHECK: addb
+; CHECK: ret
         %a = load %i8vec3* %ap, align 16
         %b = load %i8vec3* %bp, align 16
         %x = add %i8vec3 %a, %b
@@ -138,6 +148,7 @@ define void @add3i8(%i8vec3* nocapture sret %ret, %i8vec3* %ap, %i8vec3* %bp) no
         ret void
  }
  
+; CHECK: add31i8:
  %i8vec31 = type <31 x i8>
  define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp) nounwind {
  ; CHECK: movdqa
@@ -147,6 +158,7 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
  ; CHECK: movq
  ; CHECK: pextrb
  ; CHECK: pextrw
+; CHECK: ret
         %a = load %i8vec31* %ap, align 16
         %b = load %i8vec31* %bp, align 16
         %x = add %i8vec31 %a, %b
@@ -155,9 +167,10 @@ define void @add31i8(%i8vec31* nocapture sret %ret, %i8vec31* %ap, %i8vec31* %bp
  }
  
  
+; CHECK: rot
  %i8vec3pack = type { <3 x i8>, i8 }
  define %i8vec3pack  @rot() nounwind {
-; CHECK: shrb
+; CHECK: shrl
  entry:
    %X = alloca %i8vec3pack, align 4
    %rot = alloca %i8vec3pack, align 4
diff --git a/test/CodeGen/X86/widen_shuffle-1.ll b/test/CodeGen/X86/widen_shuffle-1.ll

index 8e951b77ca6a67fb83c4187c68f014a0b02a8d3e..2df3b6a30cc1700c934478fb7f2ad7ae880c597c 100644 (file)
--- a/test/CodeGen/X86/widen_shuffle-1.ll
+++ b/test/CodeGen/X86/widen_shuffle-1.ll
@@ -50,7 +50,7 @@ entry:
  ; PR10421: make sure we correctly handle extreme widening with CONCAT_VECTORS
  define <8 x i8> @shuf4(<4 x i8> %a, <4 x i8> %b) nounwind readnone {
  ; CHECK: shuf4:
-; CHECK: punpckldq
+; CHECK-NOT: punpckldq
    %vshuf = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    ret <8 x i8> %vshuf
  }
diff --git a/test/CodeGen/X86/x86-shifts.ll b/test/CodeGen/X86/x86-shifts.ll

index fdf68f92a9276bf1919b740c71f3f3ad6af24521..1cb07aa0824f918378797d94a3553640708d3323 100644 (file)
--- a/test/CodeGen/X86/x86-shifts.ll
+++ b/test/CodeGen/X86/x86-shifts.ll
@@ -124,7 +124,7 @@ entry:
  define <2 x i32> @shl2_other(<2 x i32> %A) nounwind {
  entry:
  ; CHECK: shl2_other
-; CHECK-not:      psllq
+; CHECK: psllq
    %B = shl <2 x i32> %A,  < i32 2, i32 2>
    %C = shl <2 x i32> %A,  < i32 9, i32 9>
    %K = xor <2 x i32> %B, %C
@@ -134,7 +134,7 @@ entry:
  define <2 x i32> @shr2_other(<2 x i32> %A) nounwind {
  entry:
  ; CHECK: shr2_other
-; CHECK-NOT:      psrlq
+; CHECK: psrlq
    %B = lshr <2 x i32> %A,  < i32 8, i32 8>
    %C = lshr <2 x i32> %A,  < i32 1, i32 1>
    %K = xor <2 x i32> %B, %C
author	Nadav Rotem <nadav.rotem@intel.com>
	Sun, 16 Oct 2011 20:31:33 +0000 (20:31 +0000)
committer	Nadav Rotem <nadav.rotem@intel.com>
	Sun, 16 Oct 2011 20:31:33 +0000 (20:31 +0000)
lib/CodeGen/SelectionDAG/TargetLowering.cpp		patch \| blob \| history
test/CodeGen/ARM/vrev.ll		patch \| blob \| history
test/CodeGen/CellSPU/shift_ops.ll		patch \| blob \| history
test/CodeGen/CellSPU/shuffles.ll		patch \| blob \| history
test/CodeGen/CellSPU/v2i32.ll		patch \| blob \| history
test/CodeGen/X86/2008-09-05-sinttofp-2xi32.ll		patch \| blob \| history
test/CodeGen/X86/2009-06-05-VZextByteShort.ll		patch \| blob \| history
test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll		patch \| blob \| history
test/CodeGen/X86/mmx-pinsrw.ll		patch \| blob \| history
test/CodeGen/X86/mmx-vzmovl-2.ll		patch \| blob \| history
test/CodeGen/X86/scalar_widen_div.ll		patch \| blob \| history
test/CodeGen/X86/vec_shuffle-37.ll		patch \| blob \| history
test/CodeGen/X86/vsplit-and.ll		patch \| blob \| history
test/CodeGen/X86/widen_arith-1.ll		patch \| blob \| history
test/CodeGen/X86/widen_arith-2.ll		patch \| blob \| history
test/CodeGen/X86/widen_arith-3.ll		patch \| blob \| history
test/CodeGen/X86/widen_cast-1.ll		patch \| blob \| history
test/CodeGen/X86/widen_cast-4.ll		patch \| blob \| history
test/CodeGen/X86/widen_conv-1.ll		patch \| blob \| history
test/CodeGen/X86/widen_conv-4.ll		patch \| blob \| history
test/CodeGen/X86/widen_load-0.ll		patch \| blob \| history
test/CodeGen/X86/widen_load-2.ll		patch \| blob \| history
test/CodeGen/X86/widen_shuffle-1.ll		patch \| blob \| history
test/CodeGen/X86/x86-shifts.ll		patch \| blob \| history